1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatGetValues_MPIAIJ" 241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 242 { 243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 244 PetscErrorCode ierr; 245 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 246 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 247 248 PetscFunctionBegin; 249 for (i=0; i<m; i++) { 250 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 251 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 252 if (idxm[i] >= rstart && idxm[i] < rend) { 253 row = idxm[i] - rstart; 254 for (j=0; j<n; j++) { 255 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 256 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 257 if (idxn[j] >= cstart && idxn[j] < cend){ 258 col = idxn[j] - cstart; 259 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 260 } else { 261 if (!aij->colmap) { 262 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 263 } 264 #if defined (PETSC_USE_CTABLE) 265 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 266 col --; 267 #else 268 col = aij->colmap[idxn[j]] - 1; 269 #endif 270 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 271 else { 272 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 273 } 274 } 275 } 276 } else { 277 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 278 } 279 } 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 if (aij->donotstash) { 294 PetscFunctionReturn(0); 295 } 296 297 /* make sure all processors are either in INSERTMODE or ADDMODE */ 298 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 299 if (addv == (ADD_VALUES|INSERT_VALUES)) { 300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 301 } 302 mat->insertmode = addv; /* in case this processor had no cache */ 303 304 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 305 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 306 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 313 { 314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 315 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 316 PetscErrorCode ierr; 317 PetscMPIInt n; 318 PetscInt i,j,rstart,ncols,flg; 319 PetscInt *row,*col,other_disassembled; 320 PetscScalar *val; 321 InsertMode addv = mat->insertmode; 322 323 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 324 PetscFunctionBegin; 325 if (!aij->donotstash) { 326 while (1) { 327 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 328 if (!flg) break; 329 330 for (i=0; i<n;) { 331 /* Now identify the consecutive vals belonging to the same row */ 332 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 333 if (j < n) ncols = j-i; 334 else ncols = n-i; 335 /* Now assemble all these values with a single function call */ 336 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 337 i = j; 338 } 339 } 340 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 341 } 342 a->compressedrow.use = PETSC_FALSE; 343 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 344 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 345 346 /* determine if any processor has disassembled, if so we must 347 also disassemble ourselfs, in order that we may reassemble. */ 348 /* 349 if nonzero structure of submatrix B cannot change then we know that 350 no processor disassembled thus we can skip this stuff 351 */ 352 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 353 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 354 if (mat->was_assembled && !other_disassembled) { 355 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 356 } 357 } 358 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 359 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 360 } 361 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 362 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 363 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 364 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 365 366 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 367 aij->rowvalues = 0; 368 369 /* used by MatAXPY() */ 370 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 371 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 372 373 PetscFunctionReturn(0); 374 } 375 376 #undef __FUNCT__ 377 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 379 { 380 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 381 PetscErrorCode ierr; 382 383 PetscFunctionBegin; 384 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 385 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatZeroRows_MPIAIJ" 391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 392 { 393 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 394 PetscErrorCode ierr; 395 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 396 PetscInt i,*owners = A->rmap.range; 397 PetscInt *nprocs,j,idx,nsends,row; 398 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 399 PetscInt *rvalues,count,base,slen,*source; 400 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 401 MPI_Comm comm = A->comm; 402 MPI_Request *send_waits,*recv_waits; 403 MPI_Status recv_status,*send_status; 404 #if defined(PETSC_DEBUG) 405 PetscTruth found = PETSC_FALSE; 406 #endif 407 408 PetscFunctionBegin; 409 /* first count number of contributors to each processor */ 410 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 411 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 412 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 413 j = 0; 414 for (i=0; i<N; i++) { 415 if (lastidx > (idx = rows[i])) j = 0; 416 lastidx = idx; 417 for (; j<size; j++) { 418 if (idx >= owners[j] && idx < owners[j+1]) { 419 nprocs[2*j]++; 420 nprocs[2*j+1] = 1; 421 owner[i] = j; 422 #if defined(PETSC_DEBUG) 423 found = PETSC_TRUE; 424 #endif 425 break; 426 } 427 } 428 #if defined(PETSC_DEBUG) 429 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 430 found = PETSC_FALSE; 431 #endif 432 } 433 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 434 435 /* inform other processors of number of messages and max length*/ 436 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 437 438 /* post receives: */ 439 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 440 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 441 for (i=0; i<nrecvs; i++) { 442 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 443 } 444 445 /* do sends: 446 1) starts[i] gives the starting index in svalues for stuff going to 447 the ith processor 448 */ 449 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 450 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 451 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 452 starts[0] = 0; 453 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 454 for (i=0; i<N; i++) { 455 svalues[starts[owner[i]]++] = rows[i]; 456 } 457 458 starts[0] = 0; 459 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 460 count = 0; 461 for (i=0; i<size; i++) { 462 if (nprocs[2*i+1]) { 463 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 464 } 465 } 466 ierr = PetscFree(starts);CHKERRQ(ierr); 467 468 base = owners[rank]; 469 470 /* wait on receives */ 471 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 472 source = lens + nrecvs; 473 count = nrecvs; slen = 0; 474 while (count) { 475 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 476 /* unpack receives into our local space */ 477 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 478 source[imdex] = recv_status.MPI_SOURCE; 479 lens[imdex] = n; 480 slen += n; 481 count--; 482 } 483 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 484 485 /* move the data into the send scatter */ 486 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 487 count = 0; 488 for (i=0; i<nrecvs; i++) { 489 values = rvalues + i*nmax; 490 for (j=0; j<lens[i]; j++) { 491 lrows[count++] = values[j] - base; 492 } 493 } 494 ierr = PetscFree(rvalues);CHKERRQ(ierr); 495 ierr = PetscFree(lens);CHKERRQ(ierr); 496 ierr = PetscFree(owner);CHKERRQ(ierr); 497 ierr = PetscFree(nprocs);CHKERRQ(ierr); 498 499 /* actually zap the local rows */ 500 /* 501 Zero the required rows. If the "diagonal block" of the matrix 502 is square and the user wishes to set the diagonal we use separate 503 code so that MatSetValues() is not called for each diagonal allocating 504 new memory, thus calling lots of mallocs and slowing things down. 505 506 Contributed by: Matthew Knepley 507 */ 508 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 509 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 510 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 511 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 512 } else if (diag != 0.0) { 513 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 514 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 515 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 516 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 517 } 518 for (i = 0; i < slen; i++) { 519 row = lrows[i] + rstart; 520 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 521 } 522 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 524 } else { 525 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 526 } 527 ierr = PetscFree(lrows);CHKERRQ(ierr); 528 529 /* wait on sends */ 530 if (nsends) { 531 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 532 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 533 ierr = PetscFree(send_status);CHKERRQ(ierr); 534 } 535 ierr = PetscFree(send_waits);CHKERRQ(ierr); 536 ierr = PetscFree(svalues);CHKERRQ(ierr); 537 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatMult_MPIAIJ" 543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 544 { 545 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 546 PetscErrorCode ierr; 547 PetscInt nt; 548 549 PetscFunctionBegin; 550 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 551 if (nt != A->cmap.n) { 552 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 553 } 554 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 555 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 556 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 557 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatMultAdd_MPIAIJ" 563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 564 { 565 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 568 PetscFunctionBegin; 569 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 570 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 571 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 572 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 579 { 580 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 581 PetscErrorCode ierr; 582 PetscTruth merged; 583 584 PetscFunctionBegin; 585 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 586 /* do nondiagonal part */ 587 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 588 if (!merged) { 589 /* send it on its way */ 590 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 591 /* do local part */ 592 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 593 /* receive remote parts: note this assumes the values are not actually */ 594 /* added in yy until the next line, */ 595 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 596 } else { 597 /* do local part */ 598 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 599 /* send it on its way */ 600 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 601 /* values actually were received in the Begin() but we need to call this nop */ 602 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 EXTERN_C_BEGIN 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 611 { 612 MPI_Comm comm; 613 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 614 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 615 IS Me,Notme; 616 PetscErrorCode ierr; 617 PetscInt M,N,first,last,*notme,i; 618 PetscMPIInt size; 619 620 PetscFunctionBegin; 621 622 /* Easy test: symmetric diagonal block */ 623 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 624 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 625 if (!*f) PetscFunctionReturn(0); 626 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 627 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 628 if (size == 1) PetscFunctionReturn(0); 629 630 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 631 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 632 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 633 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 634 for (i=0; i<first; i++) notme[i] = i; 635 for (i=last; i<M; i++) notme[i-last+first] = i; 636 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 637 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 638 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 639 Aoff = Aoffs[0]; 640 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 641 Boff = Boffs[0]; 642 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 644 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 645 ierr = ISDestroy(Me);CHKERRQ(ierr); 646 ierr = ISDestroy(Notme);CHKERRQ(ierr); 647 648 PetscFunctionReturn(0); 649 } 650 EXTERN_C_END 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 655 { 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 PetscErrorCode ierr; 658 659 PetscFunctionBegin; 660 /* do nondiagonal part */ 661 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 662 /* send it on its way */ 663 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 664 /* do local part */ 665 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 666 /* receive remote parts */ 667 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This only works correctly for square matrices where the subblock A->A is the 673 diagonal block 674 */ 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 678 { 679 PetscErrorCode ierr; 680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 681 682 PetscFunctionBegin; 683 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 684 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 685 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 686 } 687 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatScale_MPIAIJ" 693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 694 { 695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 696 PetscErrorCode ierr; 697 698 PetscFunctionBegin; 699 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 700 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatDestroy_MPIAIJ" 706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 709 PetscErrorCode ierr; 710 711 PetscFunctionBegin; 712 #if defined(PETSC_USE_LOG) 713 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 714 #endif 715 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 717 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 718 #if defined (PETSC_USE_CTABLE) 719 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 720 #else 721 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 722 #endif 723 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 724 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 725 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 726 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 727 ierr = PetscFree(aij);CHKERRQ(ierr); 728 729 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatView_MPIAIJ_Binary" 741 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 742 { 743 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 744 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 745 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 746 PetscErrorCode ierr; 747 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 748 int fd; 749 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 750 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 751 PetscScalar *column_values; 752 753 PetscFunctionBegin; 754 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 755 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 756 nz = A->nz + B->nz; 757 if (!rank) { 758 header[0] = MAT_FILE_COOKIE; 759 header[1] = mat->rmap.N; 760 header[2] = mat->cmap.N; 761 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 762 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 763 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 764 /* get largest number of rows any processor has */ 765 rlen = mat->rmap.n; 766 range = mat->rmap.range; 767 for (i=1; i<size; i++) { 768 rlen = PetscMax(rlen,range[i+1] - range[i]); 769 } 770 } else { 771 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 772 rlen = mat->rmap.n; 773 } 774 775 /* load up the local row counts */ 776 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 777 for (i=0; i<mat->rmap.n; i++) { 778 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 779 } 780 781 /* store the row lengths to the file */ 782 if (!rank) { 783 MPI_Status status; 784 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 785 for (i=1; i<size; i++) { 786 rlen = range[i+1] - range[i]; 787 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 788 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 789 } 790 } else { 791 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 792 } 793 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 794 795 /* load up the local column indices */ 796 nzmax = nz; /* )th processor needs space a largest processor needs */ 797 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 798 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 799 cnt = 0; 800 for (i=0; i<mat->rmap.n; i++) { 801 for (j=B->i[i]; j<B->i[i+1]; j++) { 802 if ( (col = garray[B->j[j]]) > cstart) break; 803 column_indices[cnt++] = col; 804 } 805 for (k=A->i[i]; k<A->i[i+1]; k++) { 806 column_indices[cnt++] = A->j[k] + cstart; 807 } 808 for (; j<B->i[i+1]; j++) { 809 column_indices[cnt++] = garray[B->j[j]]; 810 } 811 } 812 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 813 814 /* store the column indices to the file */ 815 if (!rank) { 816 MPI_Status status; 817 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 818 for (i=1; i<size; i++) { 819 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 820 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 821 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 822 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 823 } 824 } else { 825 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 826 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 } 828 ierr = PetscFree(column_indices);CHKERRQ(ierr); 829 830 /* load up the local column values */ 831 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 832 cnt = 0; 833 for (i=0; i<mat->rmap.n; i++) { 834 for (j=B->i[i]; j<B->i[i+1]; j++) { 835 if ( garray[B->j[j]] > cstart) break; 836 column_values[cnt++] = B->a[j]; 837 } 838 for (k=A->i[i]; k<A->i[i+1]; k++) { 839 column_values[cnt++] = A->a[k]; 840 } 841 for (; j<B->i[i+1]; j++) { 842 column_values[cnt++] = B->a[j]; 843 } 844 } 845 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 846 847 /* store the column values to the file */ 848 if (!rank) { 849 MPI_Status status; 850 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 851 for (i=1; i<size; i++) { 852 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 853 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 854 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 855 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 856 } 857 } else { 858 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 859 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 860 } 861 ierr = PetscFree(column_values);CHKERRQ(ierr); 862 PetscFunctionReturn(0); 863 } 864 865 #undef __FUNCT__ 866 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 867 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 868 { 869 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 870 PetscErrorCode ierr; 871 PetscMPIInt rank = aij->rank,size = aij->size; 872 PetscTruth isdraw,iascii,isbinary; 873 PetscViewer sviewer; 874 PetscViewerFormat format; 875 876 PetscFunctionBegin; 877 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 880 if (iascii) { 881 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 882 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 883 MatInfo info; 884 PetscTruth inodes; 885 886 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 887 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 888 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 889 if (!inodes) { 890 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 891 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 892 } else { 893 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 894 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 895 } 896 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 898 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 899 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 900 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 901 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } else if (format == PETSC_VIEWER_ASCII_INFO) { 904 PetscInt inodecount,inodelimit,*inodes; 905 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 906 if (inodes) { 907 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 908 } else { 909 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 910 } 911 PetscFunctionReturn(0); 912 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 913 PetscFunctionReturn(0); 914 } 915 } else if (isbinary) { 916 if (size == 1) { 917 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 918 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 919 } else { 920 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 921 } 922 PetscFunctionReturn(0); 923 } else if (isdraw) { 924 PetscDraw draw; 925 PetscTruth isnull; 926 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 927 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 928 } 929 930 if (size == 1) { 931 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 932 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 933 } else { 934 /* assemble the entire matrix onto first processor. */ 935 Mat A; 936 Mat_SeqAIJ *Aloc; 937 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 938 PetscScalar *a; 939 940 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 941 if (!rank) { 942 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 943 } else { 944 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 945 } 946 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 947 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 948 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 949 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 950 951 /* copy over the A part */ 952 Aloc = (Mat_SeqAIJ*)aij->A->data; 953 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 954 row = mat->rmap.rstart; 955 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 956 for (i=0; i<m; i++) { 957 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 958 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 959 } 960 aj = Aloc->j; 961 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 962 963 /* copy over the B part */ 964 Aloc = (Mat_SeqAIJ*)aij->B->data; 965 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 966 row = mat->rmap.rstart; 967 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 968 ct = cols; 969 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 970 for (i=0; i<m; i++) { 971 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 972 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 973 } 974 ierr = PetscFree(ct);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 /* 978 Everyone has to call to draw the matrix since the graphics waits are 979 synchronized across all processors that share the PetscDraw object 980 */ 981 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 982 if (!rank) { 983 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 984 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 985 } 986 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 987 ierr = MatDestroy(A);CHKERRQ(ierr); 988 } 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatView_MPIAIJ" 994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 995 { 996 PetscErrorCode ierr; 997 PetscTruth iascii,isdraw,issocket,isbinary; 998 999 PetscFunctionBegin; 1000 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1001 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1004 if (iascii || isdraw || isbinary || issocket) { 1005 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1006 } else { 1007 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatRelax_MPIAIJ" 1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1019 PetscErrorCode ierr; 1020 Vec bb1; 1021 1022 PetscFunctionBegin; 1023 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1024 1025 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1026 1027 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1028 if (flag & SOR_ZERO_INITIAL_GUESS) { 1029 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1030 its--; 1031 } 1032 1033 while (its--) { 1034 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1035 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1036 1037 /* update rhs: bb1 = bb - B*x */ 1038 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1039 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1040 1041 /* local sweep */ 1042 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1043 CHKERRQ(ierr); 1044 } 1045 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1046 if (flag & SOR_ZERO_INITIAL_GUESS) { 1047 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1048 its--; 1049 } 1050 while (its--) { 1051 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1052 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1053 1054 /* update rhs: bb1 = bb - B*x */ 1055 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1056 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1057 1058 /* local sweep */ 1059 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1060 CHKERRQ(ierr); 1061 } 1062 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1063 if (flag & SOR_ZERO_INITIAL_GUESS) { 1064 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1065 its--; 1066 } 1067 while (its--) { 1068 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1069 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1070 1071 /* update rhs: bb1 = bb - B*x */ 1072 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1073 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1074 1075 /* local sweep */ 1076 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1077 CHKERRQ(ierr); 1078 } 1079 } else { 1080 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1081 } 1082 1083 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatPermute_MPIAIJ" 1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1090 { 1091 MPI_Comm comm,pcomm; 1092 PetscInt first,local_size,nrows,*rows; 1093 int ntids; 1094 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1095 PetscErrorCode ierr; 1096 1097 PetscFunctionBegin; 1098 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1099 /* make a collective version of 'rowp' */ 1100 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1101 if (pcomm==comm) { 1102 crowp = rowp; 1103 } else { 1104 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1105 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1106 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1107 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1108 } 1109 /* collect the global row permutation and invert it */ 1110 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1111 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1112 if (pcomm!=comm) { 1113 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1114 } 1115 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1116 /* get the local target indices */ 1117 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1118 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1119 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1120 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1121 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1122 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1123 /* the column permutation is so much easier; 1124 make a local version of 'colp' and invert it */ 1125 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1126 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1127 if (ntids==1) { 1128 lcolp = colp; 1129 } else { 1130 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1131 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1132 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1133 } 1134 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1135 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1136 if (ntids>1) { 1137 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1138 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1139 } 1140 /* now we just get the submatrix */ 1141 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1142 /* clean up */ 1143 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1144 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1151 { 1152 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1153 Mat A = mat->A,B = mat->B; 1154 PetscErrorCode ierr; 1155 PetscReal isend[5],irecv[5]; 1156 1157 PetscFunctionBegin; 1158 info->block_size = 1.0; 1159 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1160 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1161 isend[3] = info->memory; isend[4] = info->mallocs; 1162 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1163 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1164 isend[3] += info->memory; isend[4] += info->mallocs; 1165 if (flag == MAT_LOCAL) { 1166 info->nz_used = isend[0]; 1167 info->nz_allocated = isend[1]; 1168 info->nz_unneeded = isend[2]; 1169 info->memory = isend[3]; 1170 info->mallocs = isend[4]; 1171 } else if (flag == MAT_GLOBAL_MAX) { 1172 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1173 info->nz_used = irecv[0]; 1174 info->nz_allocated = irecv[1]; 1175 info->nz_unneeded = irecv[2]; 1176 info->memory = irecv[3]; 1177 info->mallocs = irecv[4]; 1178 } else if (flag == MAT_GLOBAL_SUM) { 1179 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1180 info->nz_used = irecv[0]; 1181 info->nz_allocated = irecv[1]; 1182 info->nz_unneeded = irecv[2]; 1183 info->memory = irecv[3]; 1184 info->mallocs = irecv[4]; 1185 } 1186 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1187 info->fill_ratio_needed = 0; 1188 info->factor_mallocs = 0; 1189 info->rows_global = (double)matin->rmap.N; 1190 info->columns_global = (double)matin->cmap.N; 1191 info->rows_local = (double)matin->rmap.n; 1192 info->columns_local = (double)matin->cmap.N; 1193 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatSetOption_MPIAIJ" 1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1200 { 1201 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 switch (op) { 1206 case MAT_NO_NEW_NONZERO_LOCATIONS: 1207 case MAT_YES_NEW_NONZERO_LOCATIONS: 1208 case MAT_COLUMNS_UNSORTED: 1209 case MAT_COLUMNS_SORTED: 1210 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1211 case MAT_KEEP_ZEROED_ROWS: 1212 case MAT_NEW_NONZERO_LOCATION_ERR: 1213 case MAT_USE_INODES: 1214 case MAT_DO_NOT_USE_INODES: 1215 case MAT_IGNORE_ZERO_ENTRIES: 1216 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1217 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1218 break; 1219 case MAT_ROW_ORIENTED: 1220 a->roworiented = PETSC_TRUE; 1221 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1222 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1223 break; 1224 case MAT_ROWS_SORTED: 1225 case MAT_ROWS_UNSORTED: 1226 case MAT_YES_NEW_DIAGONALS: 1227 ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr); 1228 break; 1229 case MAT_COLUMN_ORIENTED: 1230 a->roworiented = PETSC_FALSE; 1231 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1232 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1233 break; 1234 case MAT_IGNORE_OFF_PROC_ENTRIES: 1235 a->donotstash = PETSC_TRUE; 1236 break; 1237 case MAT_NO_NEW_DIAGONALS: 1238 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1239 case MAT_SYMMETRIC: 1240 case MAT_STRUCTURALLY_SYMMETRIC: 1241 case MAT_HERMITIAN: 1242 case MAT_SYMMETRY_ETERNAL: 1243 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1244 break; 1245 case MAT_NOT_SYMMETRIC: 1246 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1247 case MAT_NOT_HERMITIAN: 1248 case MAT_NOT_SYMMETRY_ETERNAL: 1249 break; 1250 default: 1251 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1252 } 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNCT__ 1257 #define __FUNCT__ "MatGetRow_MPIAIJ" 1258 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1259 { 1260 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1261 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1262 PetscErrorCode ierr; 1263 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1264 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1265 PetscInt *cmap,*idx_p; 1266 1267 PetscFunctionBegin; 1268 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1269 mat->getrowactive = PETSC_TRUE; 1270 1271 if (!mat->rowvalues && (idx || v)) { 1272 /* 1273 allocate enough space to hold information from the longest row. 1274 */ 1275 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1276 PetscInt max = 1,tmp; 1277 for (i=0; i<matin->rmap.n; i++) { 1278 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1279 if (max < tmp) { max = tmp; } 1280 } 1281 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1282 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1283 } 1284 1285 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1286 lrow = row - rstart; 1287 1288 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1289 if (!v) {pvA = 0; pvB = 0;} 1290 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1291 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1292 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1293 nztot = nzA + nzB; 1294 1295 cmap = mat->garray; 1296 if (v || idx) { 1297 if (nztot) { 1298 /* Sort by increasing column numbers, assuming A and B already sorted */ 1299 PetscInt imark = -1; 1300 if (v) { 1301 *v = v_p = mat->rowvalues; 1302 for (i=0; i<nzB; i++) { 1303 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1304 else break; 1305 } 1306 imark = i; 1307 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1308 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1309 } 1310 if (idx) { 1311 *idx = idx_p = mat->rowindices; 1312 if (imark > -1) { 1313 for (i=0; i<imark; i++) { 1314 idx_p[i] = cmap[cworkB[i]]; 1315 } 1316 } else { 1317 for (i=0; i<nzB; i++) { 1318 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1319 else break; 1320 } 1321 imark = i; 1322 } 1323 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1324 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1325 } 1326 } else { 1327 if (idx) *idx = 0; 1328 if (v) *v = 0; 1329 } 1330 } 1331 *nz = nztot; 1332 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1333 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1339 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1340 { 1341 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1342 1343 PetscFunctionBegin; 1344 if (!aij->getrowactive) { 1345 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1346 } 1347 aij->getrowactive = PETSC_FALSE; 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatNorm_MPIAIJ" 1353 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1354 { 1355 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1356 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1357 PetscErrorCode ierr; 1358 PetscInt i,j,cstart = mat->cmap.rstart; 1359 PetscReal sum = 0.0; 1360 PetscScalar *v; 1361 1362 PetscFunctionBegin; 1363 if (aij->size == 1) { 1364 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1365 } else { 1366 if (type == NORM_FROBENIUS) { 1367 v = amat->a; 1368 for (i=0; i<amat->nz; i++) { 1369 #if defined(PETSC_USE_COMPLEX) 1370 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1371 #else 1372 sum += (*v)*(*v); v++; 1373 #endif 1374 } 1375 v = bmat->a; 1376 for (i=0; i<bmat->nz; i++) { 1377 #if defined(PETSC_USE_COMPLEX) 1378 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1379 #else 1380 sum += (*v)*(*v); v++; 1381 #endif 1382 } 1383 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1384 *norm = sqrt(*norm); 1385 } else if (type == NORM_1) { /* max column norm */ 1386 PetscReal *tmp,*tmp2; 1387 PetscInt *jj,*garray = aij->garray; 1388 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1389 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1390 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1391 *norm = 0.0; 1392 v = amat->a; jj = amat->j; 1393 for (j=0; j<amat->nz; j++) { 1394 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1395 } 1396 v = bmat->a; jj = bmat->j; 1397 for (j=0; j<bmat->nz; j++) { 1398 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1399 } 1400 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1401 for (j=0; j<mat->cmap.N; j++) { 1402 if (tmp2[j] > *norm) *norm = tmp2[j]; 1403 } 1404 ierr = PetscFree(tmp);CHKERRQ(ierr); 1405 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1406 } else if (type == NORM_INFINITY) { /* max row norm */ 1407 PetscReal ntemp = 0.0; 1408 for (j=0; j<aij->A->rmap.n; j++) { 1409 v = amat->a + amat->i[j]; 1410 sum = 0.0; 1411 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1412 sum += PetscAbsScalar(*v); v++; 1413 } 1414 v = bmat->a + bmat->i[j]; 1415 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1416 sum += PetscAbsScalar(*v); v++; 1417 } 1418 if (sum > ntemp) ntemp = sum; 1419 } 1420 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1421 } else { 1422 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1423 } 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatTranspose_MPIAIJ" 1430 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1431 { 1432 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1433 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1434 PetscErrorCode ierr; 1435 PetscInt M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1436 Mat B; 1437 PetscScalar *array; 1438 1439 PetscFunctionBegin; 1440 if (!matout && M != N) { 1441 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1442 } 1443 1444 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1445 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1446 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1447 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1448 1449 /* copy over the A part */ 1450 Aloc = (Mat_SeqAIJ*)a->A->data; 1451 m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1452 row = A->rmap.rstart; 1453 for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;} 1454 for (i=0; i<m; i++) { 1455 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1456 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1457 } 1458 aj = Aloc->j; 1459 for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;} 1460 1461 /* copy over the B part */ 1462 Aloc = (Mat_SeqAIJ*)a->B->data; 1463 m = a->B->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1464 row = A->rmap.rstart; 1465 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1466 ct = cols; 1467 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1468 for (i=0; i<m; i++) { 1469 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1470 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1471 } 1472 ierr = PetscFree(ct);CHKERRQ(ierr); 1473 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1474 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1475 if (matout) { 1476 *matout = B; 1477 } else { 1478 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1479 } 1480 PetscFunctionReturn(0); 1481 } 1482 1483 #undef __FUNCT__ 1484 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1485 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1486 { 1487 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1488 Mat a = aij->A,b = aij->B; 1489 PetscErrorCode ierr; 1490 PetscInt s1,s2,s3; 1491 1492 PetscFunctionBegin; 1493 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1494 if (rr) { 1495 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1496 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1497 /* Overlap communication with computation. */ 1498 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1499 } 1500 if (ll) { 1501 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1502 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1503 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1504 } 1505 /* scale the diagonal block */ 1506 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1507 1508 if (rr) { 1509 /* Do a scatter end and then right scale the off-diagonal block */ 1510 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1511 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1512 } 1513 1514 PetscFunctionReturn(0); 1515 } 1516 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1520 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1521 { 1522 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1523 PetscErrorCode ierr; 1524 1525 PetscFunctionBegin; 1526 if (!a->rank) { 1527 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1528 } 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1534 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1535 { 1536 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1537 PetscErrorCode ierr; 1538 1539 PetscFunctionBegin; 1540 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1541 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1542 PetscFunctionReturn(0); 1543 } 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1546 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1547 { 1548 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1549 PetscErrorCode ierr; 1550 1551 PetscFunctionBegin; 1552 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1553 PetscFunctionReturn(0); 1554 } 1555 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatEqual_MPIAIJ" 1558 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1559 { 1560 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1561 Mat a,b,c,d; 1562 PetscTruth flg; 1563 PetscErrorCode ierr; 1564 1565 PetscFunctionBegin; 1566 a = matA->A; b = matA->B; 1567 c = matB->A; d = matB->B; 1568 1569 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1570 if (flg) { 1571 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1572 } 1573 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1574 PetscFunctionReturn(0); 1575 } 1576 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "MatCopy_MPIAIJ" 1579 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1580 { 1581 PetscErrorCode ierr; 1582 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1583 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1584 1585 PetscFunctionBegin; 1586 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1587 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1588 /* because of the column compression in the off-processor part of the matrix a->B, 1589 the number of columns in a->B and b->B may be different, hence we cannot call 1590 the MatCopy() directly on the two parts. If need be, we can provide a more 1591 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1592 then copying the submatrices */ 1593 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1594 } else { 1595 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1596 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1597 } 1598 PetscFunctionReturn(0); 1599 } 1600 1601 #undef __FUNCT__ 1602 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1603 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1604 { 1605 PetscErrorCode ierr; 1606 1607 PetscFunctionBegin; 1608 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1609 PetscFunctionReturn(0); 1610 } 1611 1612 #include "petscblaslapack.h" 1613 #undef __FUNCT__ 1614 #define __FUNCT__ "MatAXPY_MPIAIJ" 1615 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1616 { 1617 PetscErrorCode ierr; 1618 PetscInt i; 1619 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1620 PetscBLASInt bnz,one=1; 1621 Mat_SeqAIJ *x,*y; 1622 1623 PetscFunctionBegin; 1624 if (str == SAME_NONZERO_PATTERN) { 1625 PetscScalar alpha = a; 1626 x = (Mat_SeqAIJ *)xx->A->data; 1627 y = (Mat_SeqAIJ *)yy->A->data; 1628 bnz = (PetscBLASInt)x->nz; 1629 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1630 x = (Mat_SeqAIJ *)xx->B->data; 1631 y = (Mat_SeqAIJ *)yy->B->data; 1632 bnz = (PetscBLASInt)x->nz; 1633 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1634 } else if (str == SUBSET_NONZERO_PATTERN) { 1635 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1636 1637 x = (Mat_SeqAIJ *)xx->B->data; 1638 y = (Mat_SeqAIJ *)yy->B->data; 1639 if (y->xtoy && y->XtoY != xx->B) { 1640 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1641 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1642 } 1643 if (!y->xtoy) { /* get xtoy */ 1644 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1645 y->XtoY = xx->B; 1646 } 1647 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1648 } else { 1649 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1650 } 1651 PetscFunctionReturn(0); 1652 } 1653 1654 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1655 1656 #undef __FUNCT__ 1657 #define __FUNCT__ "MatConjugate_MPIAIJ" 1658 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1659 { 1660 #if defined(PETSC_USE_COMPLEX) 1661 PetscErrorCode ierr; 1662 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1663 1664 PetscFunctionBegin; 1665 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1666 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1667 #else 1668 PetscFunctionBegin; 1669 #endif 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "MatRealPart_MPIAIJ" 1675 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1676 { 1677 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1678 PetscErrorCode ierr; 1679 1680 PetscFunctionBegin; 1681 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1682 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1688 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1689 { 1690 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1691 PetscErrorCode ierr; 1692 1693 PetscFunctionBegin; 1694 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1695 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1696 PetscFunctionReturn(0); 1697 } 1698 1699 /* -------------------------------------------------------------------*/ 1700 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1701 MatGetRow_MPIAIJ, 1702 MatRestoreRow_MPIAIJ, 1703 MatMult_MPIAIJ, 1704 /* 4*/ MatMultAdd_MPIAIJ, 1705 MatMultTranspose_MPIAIJ, 1706 MatMultTransposeAdd_MPIAIJ, 1707 0, 1708 0, 1709 0, 1710 /*10*/ 0, 1711 0, 1712 0, 1713 MatRelax_MPIAIJ, 1714 MatTranspose_MPIAIJ, 1715 /*15*/ MatGetInfo_MPIAIJ, 1716 MatEqual_MPIAIJ, 1717 MatGetDiagonal_MPIAIJ, 1718 MatDiagonalScale_MPIAIJ, 1719 MatNorm_MPIAIJ, 1720 /*20*/ MatAssemblyBegin_MPIAIJ, 1721 MatAssemblyEnd_MPIAIJ, 1722 0, 1723 MatSetOption_MPIAIJ, 1724 MatZeroEntries_MPIAIJ, 1725 /*25*/ MatZeroRows_MPIAIJ, 1726 0, 1727 0, 1728 0, 1729 0, 1730 /*30*/ MatSetUpPreallocation_MPIAIJ, 1731 0, 1732 0, 1733 0, 1734 0, 1735 /*35*/ MatDuplicate_MPIAIJ, 1736 0, 1737 0, 1738 0, 1739 0, 1740 /*40*/ MatAXPY_MPIAIJ, 1741 MatGetSubMatrices_MPIAIJ, 1742 MatIncreaseOverlap_MPIAIJ, 1743 MatGetValues_MPIAIJ, 1744 MatCopy_MPIAIJ, 1745 /*45*/ MatPrintHelp_MPIAIJ, 1746 MatScale_MPIAIJ, 1747 0, 1748 0, 1749 0, 1750 /*50*/ MatSetBlockSize_MPIAIJ, 1751 0, 1752 0, 1753 0, 1754 0, 1755 /*55*/ MatFDColoringCreate_MPIAIJ, 1756 0, 1757 MatSetUnfactored_MPIAIJ, 1758 MatPermute_MPIAIJ, 1759 0, 1760 /*60*/ MatGetSubMatrix_MPIAIJ, 1761 MatDestroy_MPIAIJ, 1762 MatView_MPIAIJ, 1763 0, 1764 0, 1765 /*65*/ 0, 1766 0, 1767 0, 1768 0, 1769 0, 1770 /*70*/ 0, 1771 0, 1772 MatSetColoring_MPIAIJ, 1773 #if defined(PETSC_HAVE_ADIC) 1774 MatSetValuesAdic_MPIAIJ, 1775 #else 1776 0, 1777 #endif 1778 MatSetValuesAdifor_MPIAIJ, 1779 /*75*/ 0, 1780 0, 1781 0, 1782 0, 1783 0, 1784 /*80*/ 0, 1785 0, 1786 0, 1787 0, 1788 /*84*/ MatLoad_MPIAIJ, 1789 0, 1790 0, 1791 0, 1792 0, 1793 0, 1794 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1795 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1796 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1797 MatPtAP_Basic, 1798 MatPtAPSymbolic_MPIAIJ, 1799 /*95*/ MatPtAPNumeric_MPIAIJ, 1800 0, 1801 0, 1802 0, 1803 0, 1804 /*100*/0, 1805 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1806 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1807 MatConjugate_MPIAIJ, 1808 0, 1809 /*105*/MatSetValuesRow_MPIAIJ, 1810 MatRealPart_MPIAIJ, 1811 MatImaginaryPart_MPIAIJ}; 1812 1813 /* ----------------------------------------------------------------------------------------*/ 1814 1815 EXTERN_C_BEGIN 1816 #undef __FUNCT__ 1817 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1818 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1819 { 1820 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1821 PetscErrorCode ierr; 1822 1823 PetscFunctionBegin; 1824 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1825 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1826 PetscFunctionReturn(0); 1827 } 1828 EXTERN_C_END 1829 1830 EXTERN_C_BEGIN 1831 #undef __FUNCT__ 1832 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1833 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1834 { 1835 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1836 PetscErrorCode ierr; 1837 1838 PetscFunctionBegin; 1839 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1840 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1841 PetscFunctionReturn(0); 1842 } 1843 EXTERN_C_END 1844 1845 #include "petscpc.h" 1846 EXTERN_C_BEGIN 1847 #undef __FUNCT__ 1848 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1849 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1850 { 1851 Mat_MPIAIJ *b; 1852 PetscErrorCode ierr; 1853 PetscInt i; 1854 1855 PetscFunctionBegin; 1856 B->preallocated = PETSC_TRUE; 1857 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1858 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1859 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1860 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1861 1862 B->rmap.bs = B->cmap.bs = 1; 1863 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 1864 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 1865 if (d_nnz) { 1866 for (i=0; i<B->rmap.n; i++) { 1867 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1868 } 1869 } 1870 if (o_nnz) { 1871 for (i=0; i<B->rmap.n; i++) { 1872 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1873 } 1874 } 1875 b = (Mat_MPIAIJ*)B->data; 1876 1877 /* Explicitly create 2 MATSEQAIJ matrices. */ 1878 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1879 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1880 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1881 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1882 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1883 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 1884 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1885 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1886 1887 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1888 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1889 1890 PetscFunctionReturn(0); 1891 } 1892 EXTERN_C_END 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1896 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1897 { 1898 Mat mat; 1899 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1900 PetscErrorCode ierr; 1901 1902 PetscFunctionBegin; 1903 *newmat = 0; 1904 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1905 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 1906 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1907 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1908 a = (Mat_MPIAIJ*)mat->data; 1909 1910 mat->factor = matin->factor; 1911 mat->rmap.bs = matin->rmap.bs; 1912 mat->assembled = PETSC_TRUE; 1913 mat->insertmode = NOT_SET_VALUES; 1914 mat->preallocated = PETSC_TRUE; 1915 1916 a->size = oldmat->size; 1917 a->rank = oldmat->rank; 1918 a->donotstash = oldmat->donotstash; 1919 a->roworiented = oldmat->roworiented; 1920 a->rowindices = 0; 1921 a->rowvalues = 0; 1922 a->getrowactive = PETSC_FALSE; 1923 1924 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 1925 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 1926 1927 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1928 if (oldmat->colmap) { 1929 #if defined (PETSC_USE_CTABLE) 1930 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1931 #else 1932 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1933 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 1934 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 1935 #endif 1936 } else a->colmap = 0; 1937 if (oldmat->garray) { 1938 PetscInt len; 1939 len = oldmat->B->cmap.n; 1940 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1941 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1942 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1943 } else a->garray = 0; 1944 1945 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1946 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1947 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1948 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1949 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1950 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1951 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1952 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1953 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1954 *newmat = mat; 1955 PetscFunctionReturn(0); 1956 } 1957 1958 #include "petscsys.h" 1959 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatLoad_MPIAIJ" 1962 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 1963 { 1964 Mat A; 1965 PetscScalar *vals,*svals; 1966 MPI_Comm comm = ((PetscObject)viewer)->comm; 1967 MPI_Status status; 1968 PetscErrorCode ierr; 1969 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1970 PetscInt i,nz,j,rstart,rend,mmax; 1971 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1972 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1973 PetscInt cend,cstart,n,*rowners; 1974 int fd; 1975 1976 PetscFunctionBegin; 1977 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1978 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1979 if (!rank) { 1980 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1981 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1982 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1983 } 1984 1985 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1986 M = header[1]; N = header[2]; 1987 /* determine ownership of all rows */ 1988 m = M/size + ((M % size) > rank); 1989 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1990 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1991 1992 /* First process needs enough room for process with most rows */ 1993 if (!rank) { 1994 mmax = rowners[1]; 1995 for (i=2; i<size; i++) { 1996 mmax = PetscMax(mmax,rowners[i]); 1997 } 1998 } else mmax = m; 1999 2000 rowners[0] = 0; 2001 for (i=2; i<=size; i++) { 2002 mmax = PetscMax(mmax,rowners[i]); 2003 rowners[i] += rowners[i-1]; 2004 } 2005 rstart = rowners[rank]; 2006 rend = rowners[rank+1]; 2007 2008 /* distribute row lengths to all processors */ 2009 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2010 if (!rank) { 2011 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2012 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2013 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2014 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2015 for (j=0; j<m; j++) { 2016 procsnz[0] += ourlens[j]; 2017 } 2018 for (i=1; i<size; i++) { 2019 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2020 /* calculate the number of nonzeros on each processor */ 2021 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2022 procsnz[i] += rowlengths[j]; 2023 } 2024 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2025 } 2026 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2027 } else { 2028 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2029 } 2030 2031 if (!rank) { 2032 /* determine max buffer needed and allocate it */ 2033 maxnz = 0; 2034 for (i=0; i<size; i++) { 2035 maxnz = PetscMax(maxnz,procsnz[i]); 2036 } 2037 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2038 2039 /* read in my part of the matrix column indices */ 2040 nz = procsnz[0]; 2041 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2042 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2043 2044 /* read in every one elses and ship off */ 2045 for (i=1; i<size; i++) { 2046 nz = procsnz[i]; 2047 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2048 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2049 } 2050 ierr = PetscFree(cols);CHKERRQ(ierr); 2051 } else { 2052 /* determine buffer space needed for message */ 2053 nz = 0; 2054 for (i=0; i<m; i++) { 2055 nz += ourlens[i]; 2056 } 2057 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2058 2059 /* receive message of column indices*/ 2060 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2061 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2062 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2063 } 2064 2065 /* determine column ownership if matrix is not square */ 2066 if (N != M) { 2067 n = N/size + ((N % size) > rank); 2068 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2069 cstart = cend - n; 2070 } else { 2071 cstart = rstart; 2072 cend = rend; 2073 n = cend - cstart; 2074 } 2075 2076 /* loop over local rows, determining number of off diagonal entries */ 2077 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2078 jj = 0; 2079 for (i=0; i<m; i++) { 2080 for (j=0; j<ourlens[i]; j++) { 2081 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2082 jj++; 2083 } 2084 } 2085 2086 /* create our matrix */ 2087 for (i=0; i<m; i++) { 2088 ourlens[i] -= offlens[i]; 2089 } 2090 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2091 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2092 ierr = MatSetType(A,type);CHKERRQ(ierr); 2093 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2094 2095 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2096 for (i=0; i<m; i++) { 2097 ourlens[i] += offlens[i]; 2098 } 2099 2100 if (!rank) { 2101 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2102 2103 /* read in my part of the matrix numerical values */ 2104 nz = procsnz[0]; 2105 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2106 2107 /* insert into matrix */ 2108 jj = rstart; 2109 smycols = mycols; 2110 svals = vals; 2111 for (i=0; i<m; i++) { 2112 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2113 smycols += ourlens[i]; 2114 svals += ourlens[i]; 2115 jj++; 2116 } 2117 2118 /* read in other processors and ship out */ 2119 for (i=1; i<size; i++) { 2120 nz = procsnz[i]; 2121 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2122 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2123 } 2124 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2125 } else { 2126 /* receive numeric values */ 2127 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2128 2129 /* receive message of values*/ 2130 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2131 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2132 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2133 2134 /* insert into matrix */ 2135 jj = rstart; 2136 smycols = mycols; 2137 svals = vals; 2138 for (i=0; i<m; i++) { 2139 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2140 smycols += ourlens[i]; 2141 svals += ourlens[i]; 2142 jj++; 2143 } 2144 } 2145 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2146 ierr = PetscFree(vals);CHKERRQ(ierr); 2147 ierr = PetscFree(mycols);CHKERRQ(ierr); 2148 ierr = PetscFree(rowners);CHKERRQ(ierr); 2149 2150 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2151 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2152 *newmat = A; 2153 PetscFunctionReturn(0); 2154 } 2155 2156 #undef __FUNCT__ 2157 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2158 /* 2159 Not great since it makes two copies of the submatrix, first an SeqAIJ 2160 in local and then by concatenating the local matrices the end result. 2161 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2162 */ 2163 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2164 { 2165 PetscErrorCode ierr; 2166 PetscMPIInt rank,size; 2167 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2168 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2169 Mat *local,M,Mreuse; 2170 PetscScalar *vwork,*aa; 2171 MPI_Comm comm = mat->comm; 2172 Mat_SeqAIJ *aij; 2173 2174 2175 PetscFunctionBegin; 2176 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2177 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2178 2179 if (call == MAT_REUSE_MATRIX) { 2180 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2181 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2182 local = &Mreuse; 2183 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2184 } else { 2185 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2186 Mreuse = *local; 2187 ierr = PetscFree(local);CHKERRQ(ierr); 2188 } 2189 2190 /* 2191 m - number of local rows 2192 n - number of columns (same on all processors) 2193 rstart - first row in new global matrix generated 2194 */ 2195 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2196 if (call == MAT_INITIAL_MATRIX) { 2197 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2198 ii = aij->i; 2199 jj = aij->j; 2200 2201 /* 2202 Determine the number of non-zeros in the diagonal and off-diagonal 2203 portions of the matrix in order to do correct preallocation 2204 */ 2205 2206 /* first get start and end of "diagonal" columns */ 2207 if (csize == PETSC_DECIDE) { 2208 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2209 if (mglobal == n) { /* square matrix */ 2210 nlocal = m; 2211 } else { 2212 nlocal = n/size + ((n % size) > rank); 2213 } 2214 } else { 2215 nlocal = csize; 2216 } 2217 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2218 rstart = rend - nlocal; 2219 if (rank == size - 1 && rend != n) { 2220 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2221 } 2222 2223 /* next, compute all the lengths */ 2224 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2225 olens = dlens + m; 2226 for (i=0; i<m; i++) { 2227 jend = ii[i+1] - ii[i]; 2228 olen = 0; 2229 dlen = 0; 2230 for (j=0; j<jend; j++) { 2231 if (*jj < rstart || *jj >= rend) olen++; 2232 else dlen++; 2233 jj++; 2234 } 2235 olens[i] = olen; 2236 dlens[i] = dlen; 2237 } 2238 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2239 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2240 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2241 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2242 ierr = PetscFree(dlens);CHKERRQ(ierr); 2243 } else { 2244 PetscInt ml,nl; 2245 2246 M = *newmat; 2247 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2248 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2249 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2250 /* 2251 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2252 rather than the slower MatSetValues(). 2253 */ 2254 M->was_assembled = PETSC_TRUE; 2255 M->assembled = PETSC_FALSE; 2256 } 2257 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2258 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2259 ii = aij->i; 2260 jj = aij->j; 2261 aa = aij->a; 2262 for (i=0; i<m; i++) { 2263 row = rstart + i; 2264 nz = ii[i+1] - ii[i]; 2265 cwork = jj; jj += nz; 2266 vwork = aa; aa += nz; 2267 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2268 } 2269 2270 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2271 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2272 *newmat = M; 2273 2274 /* save submatrix used in processor for next request */ 2275 if (call == MAT_INITIAL_MATRIX) { 2276 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2277 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2278 } 2279 2280 PetscFunctionReturn(0); 2281 } 2282 2283 EXTERN_C_BEGIN 2284 #undef __FUNCT__ 2285 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2286 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2287 { 2288 PetscInt m,cstart, cend,j,nnz,i,d; 2289 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2290 const PetscInt *JJ; 2291 PetscScalar *values; 2292 PetscErrorCode ierr; 2293 2294 PetscFunctionBegin; 2295 if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]); 2296 2297 B->rmap.bs = B->cmap.bs = 1; 2298 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2299 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2300 m = B->rmap.n; 2301 cstart = B->cmap.rstart; 2302 cend = B->cmap.rend; 2303 rstart = B->rmap.rstart; 2304 2305 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2306 o_nnz = d_nnz + m; 2307 2308 for (i=0; i<m; i++) { 2309 nnz = I[i+1]- I[i]; 2310 JJ = J + I[i]; 2311 nnz_max = PetscMax(nnz_max,nnz); 2312 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2313 for (j=0; j<nnz; j++) { 2314 if (*JJ >= cstart) break; 2315 JJ++; 2316 } 2317 d = 0; 2318 for (; j<nnz; j++) { 2319 if (*JJ++ >= cend) break; 2320 d++; 2321 } 2322 d_nnz[i] = d; 2323 o_nnz[i] = nnz - d; 2324 } 2325 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2326 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2327 2328 if (v) values = (PetscScalar*)v; 2329 else { 2330 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2331 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2332 } 2333 2334 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2335 for (i=0; i<m; i++) { 2336 ii = i + rstart; 2337 nnz = I[i+1]- I[i]; 2338 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2339 } 2340 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2341 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2342 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2343 2344 if (!v) { 2345 ierr = PetscFree(values);CHKERRQ(ierr); 2346 } 2347 PetscFunctionReturn(0); 2348 } 2349 EXTERN_C_END 2350 2351 #undef __FUNCT__ 2352 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2353 /*@ 2354 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2355 (the default parallel PETSc format). 2356 2357 Collective on MPI_Comm 2358 2359 Input Parameters: 2360 + B - the matrix 2361 . i - the indices into j for the start of each local row (starts with zero) 2362 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2363 - v - optional values in the matrix 2364 2365 Level: developer 2366 2367 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2368 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2369 2370 .keywords: matrix, aij, compressed row, sparse, parallel 2371 2372 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2373 MatCreateSeqAIJWithArrays() 2374 @*/ 2375 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2376 { 2377 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2378 2379 PetscFunctionBegin; 2380 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2381 if (f) { 2382 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2383 } 2384 PetscFunctionReturn(0); 2385 } 2386 2387 #undef __FUNCT__ 2388 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2389 /*@C 2390 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2391 (the default parallel PETSc format). For good matrix assembly performance 2392 the user should preallocate the matrix storage by setting the parameters 2393 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2394 performance can be increased by more than a factor of 50. 2395 2396 Collective on MPI_Comm 2397 2398 Input Parameters: 2399 + A - the matrix 2400 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2401 (same value is used for all local rows) 2402 . d_nnz - array containing the number of nonzeros in the various rows of the 2403 DIAGONAL portion of the local submatrix (possibly different for each row) 2404 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2405 The size of this array is equal to the number of local rows, i.e 'm'. 2406 You must leave room for the diagonal entry even if it is zero. 2407 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2408 submatrix (same value is used for all local rows). 2409 - o_nnz - array containing the number of nonzeros in the various rows of the 2410 OFF-DIAGONAL portion of the local submatrix (possibly different for 2411 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2412 structure. The size of this array is equal to the number 2413 of local rows, i.e 'm'. 2414 2415 If the *_nnz parameter is given then the *_nz parameter is ignored 2416 2417 The AIJ format (also called the Yale sparse matrix format or 2418 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2419 storage. The stored row and column indices begin with zero. See the users manual for details. 2420 2421 The parallel matrix is partitioned such that the first m0 rows belong to 2422 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2423 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2424 2425 The DIAGONAL portion of the local submatrix of a processor can be defined 2426 as the submatrix which is obtained by extraction the part corresponding 2427 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2428 first row that belongs to the processor, and r2 is the last row belonging 2429 to the this processor. This is a square mxm matrix. The remaining portion 2430 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2431 2432 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2433 2434 Example usage: 2435 2436 Consider the following 8x8 matrix with 34 non-zero values, that is 2437 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2438 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2439 as follows: 2440 2441 .vb 2442 1 2 0 | 0 3 0 | 0 4 2443 Proc0 0 5 6 | 7 0 0 | 8 0 2444 9 0 10 | 11 0 0 | 12 0 2445 ------------------------------------- 2446 13 0 14 | 15 16 17 | 0 0 2447 Proc1 0 18 0 | 19 20 21 | 0 0 2448 0 0 0 | 22 23 0 | 24 0 2449 ------------------------------------- 2450 Proc2 25 26 27 | 0 0 28 | 29 0 2451 30 0 0 | 31 32 33 | 0 34 2452 .ve 2453 2454 This can be represented as a collection of submatrices as: 2455 2456 .vb 2457 A B C 2458 D E F 2459 G H I 2460 .ve 2461 2462 Where the submatrices A,B,C are owned by proc0, D,E,F are 2463 owned by proc1, G,H,I are owned by proc2. 2464 2465 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2466 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2467 The 'M','N' parameters are 8,8, and have the same values on all procs. 2468 2469 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2470 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2471 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2472 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2473 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2474 matrix, ans [DF] as another SeqAIJ matrix. 2475 2476 When d_nz, o_nz parameters are specified, d_nz storage elements are 2477 allocated for every row of the local diagonal submatrix, and o_nz 2478 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2479 One way to choose d_nz and o_nz is to use the max nonzerors per local 2480 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2481 In this case, the values of d_nz,o_nz are: 2482 .vb 2483 proc0 : dnz = 2, o_nz = 2 2484 proc1 : dnz = 3, o_nz = 2 2485 proc2 : dnz = 1, o_nz = 4 2486 .ve 2487 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2488 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2489 for proc3. i.e we are using 12+15+10=37 storage locations to store 2490 34 values. 2491 2492 When d_nnz, o_nnz parameters are specified, the storage is specified 2493 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2494 In the above case the values for d_nnz,o_nnz are: 2495 .vb 2496 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2497 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2498 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2499 .ve 2500 Here the space allocated is sum of all the above values i.e 34, and 2501 hence pre-allocation is perfect. 2502 2503 Level: intermediate 2504 2505 .keywords: matrix, aij, compressed row, sparse, parallel 2506 2507 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2508 MPIAIJ 2509 @*/ 2510 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2511 { 2512 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2513 2514 PetscFunctionBegin; 2515 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2516 if (f) { 2517 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2518 } 2519 PetscFunctionReturn(0); 2520 } 2521 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 2524 /*@C 2525 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 2526 CSR format the local rows. 2527 2528 Collective on MPI_Comm 2529 2530 Input Parameters: 2531 + comm - MPI communicator 2532 . m - number of local rows (Cannot be PETSC_DECIDE) 2533 . n - This value should be the same as the local size used in creating the 2534 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2535 calculated if N is given) For square matrices n is almost always m. 2536 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2537 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2538 . i - row indices 2539 . j - column indices 2540 - a - matrix values 2541 2542 Output Parameter: 2543 . mat - the matrix 2544 Level: intermediate 2545 2546 Notes: 2547 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2548 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2549 called this routine. 2550 2551 The i and j indices are 0 based 2552 2553 .keywords: matrix, aij, compressed row, sparse, parallel 2554 2555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2556 MPIAIJ, MatCreateMPIAIJ() 2557 @*/ 2558 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2559 { 2560 PetscErrorCode ierr; 2561 2562 PetscFunctionBegin; 2563 if (i[0]) { 2564 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2565 } 2566 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2567 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2568 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 2569 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 2570 PetscFunctionReturn(0); 2571 } 2572 2573 #undef __FUNCT__ 2574 #define __FUNCT__ "MatCreateMPIAIJ" 2575 /*@C 2576 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2577 (the default parallel PETSc format). For good matrix assembly performance 2578 the user should preallocate the matrix storage by setting the parameters 2579 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2580 performance can be increased by more than a factor of 50. 2581 2582 Collective on MPI_Comm 2583 2584 Input Parameters: 2585 + comm - MPI communicator 2586 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2587 This value should be the same as the local size used in creating the 2588 y vector for the matrix-vector product y = Ax. 2589 . n - This value should be the same as the local size used in creating the 2590 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2591 calculated if N is given) For square matrices n is almost always m. 2592 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2593 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2594 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2595 (same value is used for all local rows) 2596 . d_nnz - array containing the number of nonzeros in the various rows of the 2597 DIAGONAL portion of the local submatrix (possibly different for each row) 2598 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2599 The size of this array is equal to the number of local rows, i.e 'm'. 2600 You must leave room for the diagonal entry even if it is zero. 2601 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2602 submatrix (same value is used for all local rows). 2603 - o_nnz - array containing the number of nonzeros in the various rows of the 2604 OFF-DIAGONAL portion of the local submatrix (possibly different for 2605 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2606 structure. The size of this array is equal to the number 2607 of local rows, i.e 'm'. 2608 2609 Output Parameter: 2610 . A - the matrix 2611 2612 Notes: 2613 If the *_nnz parameter is given then the *_nz parameter is ignored 2614 2615 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2616 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2617 storage requirements for this matrix. 2618 2619 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2620 processor than it must be used on all processors that share the object for 2621 that argument. 2622 2623 The user MUST specify either the local or global matrix dimensions 2624 (possibly both). 2625 2626 The parallel matrix is partitioned such that the first m0 rows belong to 2627 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2628 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2629 2630 The DIAGONAL portion of the local submatrix of a processor can be defined 2631 as the submatrix which is obtained by extraction the part corresponding 2632 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2633 first row that belongs to the processor, and r2 is the last row belonging 2634 to the this processor. This is a square mxm matrix. The remaining portion 2635 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2636 2637 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2638 2639 When calling this routine with a single process communicator, a matrix of 2640 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2641 type of communicator, use the construction mechanism: 2642 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2643 2644 By default, this format uses inodes (identical nodes) when possible. 2645 We search for consecutive rows with the same nonzero structure, thereby 2646 reusing matrix information to achieve increased efficiency. 2647 2648 Options Database Keys: 2649 + -mat_no_inode - Do not use inodes 2650 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2651 - -mat_aij_oneindex - Internally use indexing starting at 1 2652 rather than 0. Note that when calling MatSetValues(), 2653 the user still MUST index entries starting at 0! 2654 2655 2656 Example usage: 2657 2658 Consider the following 8x8 matrix with 34 non-zero values, that is 2659 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2660 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2661 as follows: 2662 2663 .vb 2664 1 2 0 | 0 3 0 | 0 4 2665 Proc0 0 5 6 | 7 0 0 | 8 0 2666 9 0 10 | 11 0 0 | 12 0 2667 ------------------------------------- 2668 13 0 14 | 15 16 17 | 0 0 2669 Proc1 0 18 0 | 19 20 21 | 0 0 2670 0 0 0 | 22 23 0 | 24 0 2671 ------------------------------------- 2672 Proc2 25 26 27 | 0 0 28 | 29 0 2673 30 0 0 | 31 32 33 | 0 34 2674 .ve 2675 2676 This can be represented as a collection of submatrices as: 2677 2678 .vb 2679 A B C 2680 D E F 2681 G H I 2682 .ve 2683 2684 Where the submatrices A,B,C are owned by proc0, D,E,F are 2685 owned by proc1, G,H,I are owned by proc2. 2686 2687 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2688 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2689 The 'M','N' parameters are 8,8, and have the same values on all procs. 2690 2691 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2692 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2693 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2694 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2695 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2696 matrix, ans [DF] as another SeqAIJ matrix. 2697 2698 When d_nz, o_nz parameters are specified, d_nz storage elements are 2699 allocated for every row of the local diagonal submatrix, and o_nz 2700 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2701 One way to choose d_nz and o_nz is to use the max nonzerors per local 2702 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2703 In this case, the values of d_nz,o_nz are: 2704 .vb 2705 proc0 : dnz = 2, o_nz = 2 2706 proc1 : dnz = 3, o_nz = 2 2707 proc2 : dnz = 1, o_nz = 4 2708 .ve 2709 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2710 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2711 for proc3. i.e we are using 12+15+10=37 storage locations to store 2712 34 values. 2713 2714 When d_nnz, o_nnz parameters are specified, the storage is specified 2715 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2716 In the above case the values for d_nnz,o_nnz are: 2717 .vb 2718 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2719 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2720 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2721 .ve 2722 Here the space allocated is sum of all the above values i.e 34, and 2723 hence pre-allocation is perfect. 2724 2725 Level: intermediate 2726 2727 .keywords: matrix, aij, compressed row, sparse, parallel 2728 2729 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2730 MPIAIJ, MatCreateMPIAIJWithArrays() 2731 @*/ 2732 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2733 { 2734 PetscErrorCode ierr; 2735 PetscMPIInt size; 2736 2737 PetscFunctionBegin; 2738 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2739 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2740 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2741 if (size > 1) { 2742 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2743 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2744 } else { 2745 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2746 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2747 } 2748 PetscFunctionReturn(0); 2749 } 2750 2751 #undef __FUNCT__ 2752 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2753 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2754 { 2755 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2756 2757 PetscFunctionBegin; 2758 *Ad = a->A; 2759 *Ao = a->B; 2760 *colmap = a->garray; 2761 PetscFunctionReturn(0); 2762 } 2763 2764 #undef __FUNCT__ 2765 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2766 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2767 { 2768 PetscErrorCode ierr; 2769 PetscInt i; 2770 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2771 2772 PetscFunctionBegin; 2773 if (coloring->ctype == IS_COLORING_LOCAL) { 2774 ISColoringValue *allcolors,*colors; 2775 ISColoring ocoloring; 2776 2777 /* set coloring for diagonal portion */ 2778 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2779 2780 /* set coloring for off-diagonal portion */ 2781 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2782 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2783 for (i=0; i<a->B->cmap.n; i++) { 2784 colors[i] = allcolors[a->garray[i]]; 2785 } 2786 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2787 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2788 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2789 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2790 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2791 ISColoringValue *colors; 2792 PetscInt *larray; 2793 ISColoring ocoloring; 2794 2795 /* set coloring for diagonal portion */ 2796 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2797 for (i=0; i<a->A->cmap.n; i++) { 2798 larray[i] = i + A->cmap.rstart; 2799 } 2800 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2801 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2802 for (i=0; i<a->A->cmap.n; i++) { 2803 colors[i] = coloring->colors[larray[i]]; 2804 } 2805 ierr = PetscFree(larray);CHKERRQ(ierr); 2806 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2807 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2808 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2809 2810 /* set coloring for off-diagonal portion */ 2811 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2812 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2813 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2814 for (i=0; i<a->B->cmap.n; i++) { 2815 colors[i] = coloring->colors[larray[i]]; 2816 } 2817 ierr = PetscFree(larray);CHKERRQ(ierr); 2818 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2819 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2820 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2821 } else { 2822 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2823 } 2824 2825 PetscFunctionReturn(0); 2826 } 2827 2828 #if defined(PETSC_HAVE_ADIC) 2829 #undef __FUNCT__ 2830 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2831 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2832 { 2833 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2834 PetscErrorCode ierr; 2835 2836 PetscFunctionBegin; 2837 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2838 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2839 PetscFunctionReturn(0); 2840 } 2841 #endif 2842 2843 #undef __FUNCT__ 2844 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2845 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2846 { 2847 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2848 PetscErrorCode ierr; 2849 2850 PetscFunctionBegin; 2851 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2852 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2853 PetscFunctionReturn(0); 2854 } 2855 2856 #undef __FUNCT__ 2857 #define __FUNCT__ "MatMerge" 2858 /*@C 2859 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2860 matrices from each processor 2861 2862 Collective on MPI_Comm 2863 2864 Input Parameters: 2865 + comm - the communicators the parallel matrix will live on 2866 . inmat - the input sequential matrices 2867 . n - number of local columns (or PETSC_DECIDE) 2868 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2869 2870 Output Parameter: 2871 . outmat - the parallel matrix generated 2872 2873 Level: advanced 2874 2875 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2876 2877 @*/ 2878 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2879 { 2880 PetscErrorCode ierr; 2881 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2882 PetscInt *indx; 2883 PetscScalar *values; 2884 2885 PetscFunctionBegin; 2886 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2887 if (scall == MAT_INITIAL_MATRIX){ 2888 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2889 if (n == PETSC_DECIDE){ 2890 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 2891 } 2892 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2893 rstart -= m; 2894 2895 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2896 for (i=0;i<m;i++) { 2897 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2898 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2899 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2900 } 2901 /* This routine will ONLY return MPIAIJ type matrix */ 2902 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2903 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2904 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2905 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2906 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2907 2908 } else if (scall == MAT_REUSE_MATRIX){ 2909 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2910 } else { 2911 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2912 } 2913 2914 for (i=0;i<m;i++) { 2915 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2916 I = i + rstart; 2917 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2918 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2919 } 2920 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2921 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2922 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2923 2924 PetscFunctionReturn(0); 2925 } 2926 2927 #undef __FUNCT__ 2928 #define __FUNCT__ "MatFileSplit" 2929 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2930 { 2931 PetscErrorCode ierr; 2932 PetscMPIInt rank; 2933 PetscInt m,N,i,rstart,nnz; 2934 size_t len; 2935 const PetscInt *indx; 2936 PetscViewer out; 2937 char *name; 2938 Mat B; 2939 const PetscScalar *values; 2940 2941 PetscFunctionBegin; 2942 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2943 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2944 /* Should this be the type of the diagonal block of A? */ 2945 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2946 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 2947 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2948 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2949 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2950 for (i=0;i<m;i++) { 2951 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2952 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2953 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2954 } 2955 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2956 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2957 2958 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2959 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2960 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2961 sprintf(name,"%s.%d",outfile,rank); 2962 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 2963 ierr = PetscFree(name); 2964 ierr = MatView(B,out);CHKERRQ(ierr); 2965 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2966 ierr = MatDestroy(B);CHKERRQ(ierr); 2967 PetscFunctionReturn(0); 2968 } 2969 2970 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2971 #undef __FUNCT__ 2972 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2973 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2974 { 2975 PetscErrorCode ierr; 2976 Mat_Merge_SeqsToMPI *merge; 2977 PetscObjectContainer container; 2978 2979 PetscFunctionBegin; 2980 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2981 if (container) { 2982 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2983 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2984 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2985 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2986 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2987 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2988 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2989 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2990 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 2991 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 2992 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 2993 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 2994 2995 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2996 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2997 } 2998 ierr = PetscFree(merge);CHKERRQ(ierr); 2999 3000 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3001 PetscFunctionReturn(0); 3002 } 3003 3004 #include "src/mat/utils/freespace.h" 3005 #include "petscbt.h" 3006 static PetscEvent logkey_seqstompinum = 0; 3007 #undef __FUNCT__ 3008 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3009 /*@C 3010 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3011 matrices from each processor 3012 3013 Collective on MPI_Comm 3014 3015 Input Parameters: 3016 + comm - the communicators the parallel matrix will live on 3017 . seqmat - the input sequential matrices 3018 . m - number of local rows (or PETSC_DECIDE) 3019 . n - number of local columns (or PETSC_DECIDE) 3020 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3021 3022 Output Parameter: 3023 . mpimat - the parallel matrix generated 3024 3025 Level: advanced 3026 3027 Notes: 3028 The dimensions of the sequential matrix in each processor MUST be the same. 3029 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3030 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3031 @*/ 3032 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3033 { 3034 PetscErrorCode ierr; 3035 MPI_Comm comm=mpimat->comm; 3036 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3037 PetscMPIInt size,rank,taga,*len_s; 3038 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3039 PetscInt proc,m; 3040 PetscInt **buf_ri,**buf_rj; 3041 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3042 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3043 MPI_Request *s_waits,*r_waits; 3044 MPI_Status *status; 3045 MatScalar *aa=a->a,**abuf_r,*ba_i; 3046 Mat_Merge_SeqsToMPI *merge; 3047 PetscObjectContainer container; 3048 3049 PetscFunctionBegin; 3050 if (!logkey_seqstompinum) { 3051 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3052 } 3053 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3054 3055 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3056 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3057 3058 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3059 if (container) { 3060 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3061 } 3062 bi = merge->bi; 3063 bj = merge->bj; 3064 buf_ri = merge->buf_ri; 3065 buf_rj = merge->buf_rj; 3066 3067 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3068 owners = merge->rowmap.range; 3069 len_s = merge->len_s; 3070 3071 /* send and recv matrix values */ 3072 /*-----------------------------*/ 3073 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3074 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3075 3076 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3077 for (proc=0,k=0; proc<size; proc++){ 3078 if (!len_s[proc]) continue; 3079 i = owners[proc]; 3080 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3081 k++; 3082 } 3083 3084 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3085 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3086 ierr = PetscFree(status);CHKERRQ(ierr); 3087 3088 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3089 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3090 3091 /* insert mat values of mpimat */ 3092 /*----------------------------*/ 3093 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3094 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3095 nextrow = buf_ri_k + merge->nrecv; 3096 nextai = nextrow + merge->nrecv; 3097 3098 for (k=0; k<merge->nrecv; k++){ 3099 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3100 nrows = *(buf_ri_k[k]); 3101 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3102 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3103 } 3104 3105 /* set values of ba */ 3106 m = merge->rowmap.n; 3107 for (i=0; i<m; i++) { 3108 arow = owners[rank] + i; 3109 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3110 bnzi = bi[i+1] - bi[i]; 3111 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3112 3113 /* add local non-zero vals of this proc's seqmat into ba */ 3114 anzi = ai[arow+1] - ai[arow]; 3115 aj = a->j + ai[arow]; 3116 aa = a->a + ai[arow]; 3117 nextaj = 0; 3118 for (j=0; nextaj<anzi; j++){ 3119 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3120 ba_i[j] += aa[nextaj++]; 3121 } 3122 } 3123 3124 /* add received vals into ba */ 3125 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3126 /* i-th row */ 3127 if (i == *nextrow[k]) { 3128 anzi = *(nextai[k]+1) - *nextai[k]; 3129 aj = buf_rj[k] + *(nextai[k]); 3130 aa = abuf_r[k] + *(nextai[k]); 3131 nextaj = 0; 3132 for (j=0; nextaj<anzi; j++){ 3133 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3134 ba_i[j] += aa[nextaj++]; 3135 } 3136 } 3137 nextrow[k]++; nextai[k]++; 3138 } 3139 } 3140 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3141 } 3142 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3143 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3144 3145 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3146 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3147 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3148 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3149 PetscFunctionReturn(0); 3150 } 3151 3152 static PetscEvent logkey_seqstompisym = 0; 3153 #undef __FUNCT__ 3154 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3155 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3156 { 3157 PetscErrorCode ierr; 3158 Mat B_mpi; 3159 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3160 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3161 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3162 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3163 PetscInt len,proc,*dnz,*onz; 3164 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3165 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3166 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3167 MPI_Status *status; 3168 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3169 PetscBT lnkbt; 3170 Mat_Merge_SeqsToMPI *merge; 3171 PetscObjectContainer container; 3172 3173 PetscFunctionBegin; 3174 if (!logkey_seqstompisym) { 3175 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3176 } 3177 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3178 3179 /* make sure it is a PETSc comm */ 3180 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3181 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3182 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3183 3184 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3185 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3186 3187 /* determine row ownership */ 3188 /*---------------------------------------------------------*/ 3189 merge->rowmap.n = m; 3190 merge->rowmap.N = M; 3191 merge->rowmap.bs = 1; 3192 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3193 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3194 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3195 3196 m = merge->rowmap.n; 3197 M = merge->rowmap.N; 3198 owners = merge->rowmap.range; 3199 3200 /* determine the number of messages to send, their lengths */ 3201 /*---------------------------------------------------------*/ 3202 len_s = merge->len_s; 3203 3204 len = 0; /* length of buf_si[] */ 3205 merge->nsend = 0; 3206 for (proc=0; proc<size; proc++){ 3207 len_si[proc] = 0; 3208 if (proc == rank){ 3209 len_s[proc] = 0; 3210 } else { 3211 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3212 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3213 } 3214 if (len_s[proc]) { 3215 merge->nsend++; 3216 nrows = 0; 3217 for (i=owners[proc]; i<owners[proc+1]; i++){ 3218 if (ai[i+1] > ai[i]) nrows++; 3219 } 3220 len_si[proc] = 2*(nrows+1); 3221 len += len_si[proc]; 3222 } 3223 } 3224 3225 /* determine the number and length of messages to receive for ij-structure */ 3226 /*-------------------------------------------------------------------------*/ 3227 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3228 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3229 3230 /* post the Irecv of j-structure */ 3231 /*-------------------------------*/ 3232 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3233 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3234 3235 /* post the Isend of j-structure */ 3236 /*--------------------------------*/ 3237 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3238 sj_waits = si_waits + merge->nsend; 3239 3240 for (proc=0, k=0; proc<size; proc++){ 3241 if (!len_s[proc]) continue; 3242 i = owners[proc]; 3243 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3244 k++; 3245 } 3246 3247 /* receives and sends of j-structure are complete */ 3248 /*------------------------------------------------*/ 3249 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3250 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3251 3252 /* send and recv i-structure */ 3253 /*---------------------------*/ 3254 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3255 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3256 3257 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3258 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3259 for (proc=0,k=0; proc<size; proc++){ 3260 if (!len_s[proc]) continue; 3261 /* form outgoing message for i-structure: 3262 buf_si[0]: nrows to be sent 3263 [1:nrows]: row index (global) 3264 [nrows+1:2*nrows+1]: i-structure index 3265 */ 3266 /*-------------------------------------------*/ 3267 nrows = len_si[proc]/2 - 1; 3268 buf_si_i = buf_si + nrows+1; 3269 buf_si[0] = nrows; 3270 buf_si_i[0] = 0; 3271 nrows = 0; 3272 for (i=owners[proc]; i<owners[proc+1]; i++){ 3273 anzi = ai[i+1] - ai[i]; 3274 if (anzi) { 3275 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3276 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3277 nrows++; 3278 } 3279 } 3280 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3281 k++; 3282 buf_si += len_si[proc]; 3283 } 3284 3285 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3286 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3287 3288 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3289 for (i=0; i<merge->nrecv; i++){ 3290 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3291 } 3292 3293 ierr = PetscFree(len_si);CHKERRQ(ierr); 3294 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3295 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3296 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3297 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3298 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3299 ierr = PetscFree(status);CHKERRQ(ierr); 3300 3301 /* compute a local seq matrix in each processor */ 3302 /*----------------------------------------------*/ 3303 /* allocate bi array and free space for accumulating nonzero column info */ 3304 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3305 bi[0] = 0; 3306 3307 /* create and initialize a linked list */ 3308 nlnk = N+1; 3309 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3310 3311 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3312 len = 0; 3313 len = ai[owners[rank+1]] - ai[owners[rank]]; 3314 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3315 current_space = free_space; 3316 3317 /* determine symbolic info for each local row */ 3318 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3319 nextrow = buf_ri_k + merge->nrecv; 3320 nextai = nextrow + merge->nrecv; 3321 for (k=0; k<merge->nrecv; k++){ 3322 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3323 nrows = *buf_ri_k[k]; 3324 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3325 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3326 } 3327 3328 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3329 len = 0; 3330 for (i=0;i<m;i++) { 3331 bnzi = 0; 3332 /* add local non-zero cols of this proc's seqmat into lnk */ 3333 arow = owners[rank] + i; 3334 anzi = ai[arow+1] - ai[arow]; 3335 aj = a->j + ai[arow]; 3336 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3337 bnzi += nlnk; 3338 /* add received col data into lnk */ 3339 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3340 if (i == *nextrow[k]) { /* i-th row */ 3341 anzi = *(nextai[k]+1) - *nextai[k]; 3342 aj = buf_rj[k] + *nextai[k]; 3343 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3344 bnzi += nlnk; 3345 nextrow[k]++; nextai[k]++; 3346 } 3347 } 3348 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3349 3350 /* if free space is not available, make more free space */ 3351 if (current_space->local_remaining<bnzi) { 3352 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3353 nspacedouble++; 3354 } 3355 /* copy data into free space, then initialize lnk */ 3356 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3357 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3358 3359 current_space->array += bnzi; 3360 current_space->local_used += bnzi; 3361 current_space->local_remaining -= bnzi; 3362 3363 bi[i+1] = bi[i] + bnzi; 3364 } 3365 3366 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3367 3368 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3369 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3370 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3371 3372 /* create symbolic parallel matrix B_mpi */ 3373 /*---------------------------------------*/ 3374 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3375 if (n==PETSC_DECIDE) { 3376 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3377 } else { 3378 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3379 } 3380 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3381 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3382 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3383 3384 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3385 B_mpi->assembled = PETSC_FALSE; 3386 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3387 merge->bi = bi; 3388 merge->bj = bj; 3389 merge->buf_ri = buf_ri; 3390 merge->buf_rj = buf_rj; 3391 merge->coi = PETSC_NULL; 3392 merge->coj = PETSC_NULL; 3393 merge->owners_co = PETSC_NULL; 3394 3395 /* attach the supporting struct to B_mpi for reuse */ 3396 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3397 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3398 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3399 *mpimat = B_mpi; 3400 3401 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3402 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3403 PetscFunctionReturn(0); 3404 } 3405 3406 static PetscEvent logkey_seqstompi = 0; 3407 #undef __FUNCT__ 3408 #define __FUNCT__ "MatMerge_SeqsToMPI" 3409 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3410 { 3411 PetscErrorCode ierr; 3412 3413 PetscFunctionBegin; 3414 if (!logkey_seqstompi) { 3415 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3416 } 3417 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3418 if (scall == MAT_INITIAL_MATRIX){ 3419 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3420 } 3421 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3422 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3423 PetscFunctionReturn(0); 3424 } 3425 static PetscEvent logkey_getlocalmat = 0; 3426 #undef __FUNCT__ 3427 #define __FUNCT__ "MatGetLocalMat" 3428 /*@C 3429 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3430 3431 Not Collective 3432 3433 Input Parameters: 3434 + A - the matrix 3435 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3436 3437 Output Parameter: 3438 . A_loc - the local sequential matrix generated 3439 3440 Level: developer 3441 3442 @*/ 3443 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3444 { 3445 PetscErrorCode ierr; 3446 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3447 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3448 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3449 PetscScalar *aa=a->a,*ba=b->a,*ca; 3450 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3451 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3452 3453 PetscFunctionBegin; 3454 if (!logkey_getlocalmat) { 3455 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3456 } 3457 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3458 if (scall == MAT_INITIAL_MATRIX){ 3459 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3460 ci[0] = 0; 3461 for (i=0; i<am; i++){ 3462 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3463 } 3464 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3465 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3466 k = 0; 3467 for (i=0; i<am; i++) { 3468 ncols_o = bi[i+1] - bi[i]; 3469 ncols_d = ai[i+1] - ai[i]; 3470 /* off-diagonal portion of A */ 3471 for (jo=0; jo<ncols_o; jo++) { 3472 col = cmap[*bj]; 3473 if (col >= cstart) break; 3474 cj[k] = col; bj++; 3475 ca[k++] = *ba++; 3476 } 3477 /* diagonal portion of A */ 3478 for (j=0; j<ncols_d; j++) { 3479 cj[k] = cstart + *aj++; 3480 ca[k++] = *aa++; 3481 } 3482 /* off-diagonal portion of A */ 3483 for (j=jo; j<ncols_o; j++) { 3484 cj[k] = cmap[*bj++]; 3485 ca[k++] = *ba++; 3486 } 3487 } 3488 /* put together the new matrix */ 3489 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3490 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3491 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3492 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3493 mat->freedata = PETSC_TRUE; 3494 mat->nonew = 0; 3495 } else if (scall == MAT_REUSE_MATRIX){ 3496 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3497 ci = mat->i; cj = mat->j; ca = mat->a; 3498 for (i=0; i<am; i++) { 3499 /* off-diagonal portion of A */ 3500 ncols_o = bi[i+1] - bi[i]; 3501 for (jo=0; jo<ncols_o; jo++) { 3502 col = cmap[*bj]; 3503 if (col >= cstart) break; 3504 *ca++ = *ba++; bj++; 3505 } 3506 /* diagonal portion of A */ 3507 ncols_d = ai[i+1] - ai[i]; 3508 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3509 /* off-diagonal portion of A */ 3510 for (j=jo; j<ncols_o; j++) { 3511 *ca++ = *ba++; bj++; 3512 } 3513 } 3514 } else { 3515 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3516 } 3517 3518 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3519 PetscFunctionReturn(0); 3520 } 3521 3522 static PetscEvent logkey_getlocalmatcondensed = 0; 3523 #undef __FUNCT__ 3524 #define __FUNCT__ "MatGetLocalMatCondensed" 3525 /*@C 3526 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3527 3528 Not Collective 3529 3530 Input Parameters: 3531 + A - the matrix 3532 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3533 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3534 3535 Output Parameter: 3536 . A_loc - the local sequential matrix generated 3537 3538 Level: developer 3539 3540 @*/ 3541 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3542 { 3543 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3544 PetscErrorCode ierr; 3545 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3546 IS isrowa,iscola; 3547 Mat *aloc; 3548 3549 PetscFunctionBegin; 3550 if (!logkey_getlocalmatcondensed) { 3551 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3552 } 3553 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3554 if (!row){ 3555 start = A->rmap.rstart; end = A->rmap.rend; 3556 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3557 } else { 3558 isrowa = *row; 3559 } 3560 if (!col){ 3561 start = A->cmap.rstart; 3562 cmap = a->garray; 3563 nzA = a->A->cmap.n; 3564 nzB = a->B->cmap.n; 3565 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3566 ncols = 0; 3567 for (i=0; i<nzB; i++) { 3568 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3569 else break; 3570 } 3571 imark = i; 3572 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3573 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3574 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3575 ierr = PetscFree(idx);CHKERRQ(ierr); 3576 } else { 3577 iscola = *col; 3578 } 3579 if (scall != MAT_INITIAL_MATRIX){ 3580 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3581 aloc[0] = *A_loc; 3582 } 3583 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3584 *A_loc = aloc[0]; 3585 ierr = PetscFree(aloc);CHKERRQ(ierr); 3586 if (!row){ 3587 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3588 } 3589 if (!col){ 3590 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3591 } 3592 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3593 PetscFunctionReturn(0); 3594 } 3595 3596 static PetscEvent logkey_GetBrowsOfAcols = 0; 3597 #undef __FUNCT__ 3598 #define __FUNCT__ "MatGetBrowsOfAcols" 3599 /*@C 3600 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3601 3602 Collective on Mat 3603 3604 Input Parameters: 3605 + A,B - the matrices in mpiaij format 3606 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3607 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3608 3609 Output Parameter: 3610 + rowb, colb - index sets of rows and columns of B to extract 3611 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 3612 - B_seq - the sequential matrix generated 3613 3614 Level: developer 3615 3616 @*/ 3617 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3618 { 3619 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3620 PetscErrorCode ierr; 3621 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3622 IS isrowb,iscolb; 3623 Mat *bseq; 3624 3625 PetscFunctionBegin; 3626 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3627 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3628 } 3629 if (!logkey_GetBrowsOfAcols) { 3630 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3631 } 3632 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3633 3634 if (scall == MAT_INITIAL_MATRIX){ 3635 start = A->cmap.rstart; 3636 cmap = a->garray; 3637 nzA = a->A->cmap.n; 3638 nzB = a->B->cmap.n; 3639 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3640 ncols = 0; 3641 for (i=0; i<nzB; i++) { /* row < local row index */ 3642 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3643 else break; 3644 } 3645 imark = i; 3646 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3647 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3648 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3649 ierr = PetscFree(idx);CHKERRQ(ierr); 3650 *brstart = imark; 3651 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 3652 } else { 3653 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3654 isrowb = *rowb; iscolb = *colb; 3655 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3656 bseq[0] = *B_seq; 3657 } 3658 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3659 *B_seq = bseq[0]; 3660 ierr = PetscFree(bseq);CHKERRQ(ierr); 3661 if (!rowb){ 3662 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3663 } else { 3664 *rowb = isrowb; 3665 } 3666 if (!colb){ 3667 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3668 } else { 3669 *colb = iscolb; 3670 } 3671 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3672 PetscFunctionReturn(0); 3673 } 3674 3675 static PetscEvent logkey_GetBrowsOfAocols = 0; 3676 #undef __FUNCT__ 3677 #define __FUNCT__ "MatGetBrowsOfAoCols" 3678 /*@C 3679 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3680 of the OFF-DIAGONAL portion of local A 3681 3682 Collective on Mat 3683 3684 Input Parameters: 3685 + A,B - the matrices in mpiaij format 3686 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3687 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3688 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3689 3690 Output Parameter: 3691 + B_oth - the sequential matrix generated 3692 3693 Level: developer 3694 3695 @*/ 3696 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3697 { 3698 VecScatter_MPI_General *gen_to,*gen_from; 3699 PetscErrorCode ierr; 3700 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3701 Mat_SeqAIJ *b_oth; 3702 VecScatter ctx=a->Mvctx; 3703 MPI_Comm comm=ctx->comm; 3704 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3705 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 3706 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3707 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3708 MPI_Request *rwaits,*swaits; 3709 MPI_Status *sstatus,rstatus; 3710 PetscInt *cols; 3711 PetscScalar *vals; 3712 PetscMPIInt j; 3713 3714 PetscFunctionBegin; 3715 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3716 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3717 } 3718 if (!logkey_GetBrowsOfAocols) { 3719 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3720 } 3721 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3722 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3723 3724 gen_to = (VecScatter_MPI_General*)ctx->todata; 3725 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3726 rvalues = gen_from->values; /* holds the length of sending row */ 3727 svalues = gen_to->values; /* holds the length of receiving row */ 3728 nrecvs = gen_from->n; 3729 nsends = gen_to->n; 3730 3731 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 3732 srow = gen_to->indices; /* local row index to be sent */ 3733 rstarts = gen_from->starts; 3734 sstarts = gen_to->starts; 3735 rprocs = gen_from->procs; 3736 sprocs = gen_to->procs; 3737 sstatus = gen_to->sstatus; 3738 3739 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3740 if (scall == MAT_INITIAL_MATRIX){ 3741 /* i-array */ 3742 /*---------*/ 3743 /* post receives */ 3744 for (i=0; i<nrecvs; i++){ 3745 rowlen = (PetscInt*)rvalues + rstarts[i]; 3746 nrows = rstarts[i+1]-rstarts[i]; 3747 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3748 } 3749 3750 /* pack the outgoing message */ 3751 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3752 rstartsj = sstartsj + nsends +1; 3753 sstartsj[0] = 0; rstartsj[0] = 0; 3754 len = 0; /* total length of j or a array to be sent */ 3755 k = 0; 3756 for (i=0; i<nsends; i++){ 3757 rowlen = (PetscInt*)svalues + sstarts[i]; 3758 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3759 for (j=0; j<nrows; j++) { 3760 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 3761 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3762 len += rowlen[j]; 3763 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3764 k++; 3765 } 3766 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3767 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3768 } 3769 /* recvs and sends of i-array are completed */ 3770 i = nrecvs; 3771 while (i--) { 3772 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3773 } 3774 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3775 /* allocate buffers for sending j and a arrays */ 3776 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3777 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3778 3779 /* create i-array of B_oth */ 3780 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3781 b_othi[0] = 0; 3782 len = 0; /* total length of j or a array to be received */ 3783 k = 0; 3784 for (i=0; i<nrecvs; i++){ 3785 rowlen = (PetscInt*)rvalues + rstarts[i]; 3786 nrows = rstarts[i+1]-rstarts[i]; 3787 for (j=0; j<nrows; j++) { 3788 b_othi[k+1] = b_othi[k] + rowlen[j]; 3789 len += rowlen[j]; k++; 3790 } 3791 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3792 } 3793 3794 /* allocate space for j and a arrrays of B_oth */ 3795 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3796 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3797 3798 /* j-array */ 3799 /*---------*/ 3800 /* post receives of j-array */ 3801 for (i=0; i<nrecvs; i++){ 3802 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3803 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3804 } 3805 k = 0; 3806 for (i=0; i<nsends; i++){ 3807 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3808 bufJ = bufj+sstartsj[i]; 3809 for (j=0; j<nrows; j++) { 3810 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3811 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3812 for (l=0; l<ncols; l++){ 3813 *bufJ++ = cols[l]; 3814 } 3815 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3816 } 3817 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3818 } 3819 3820 /* recvs and sends of j-array are completed */ 3821 i = nrecvs; 3822 while (i--) { 3823 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3824 } 3825 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3826 } else if (scall == MAT_REUSE_MATRIX){ 3827 sstartsj = *startsj; 3828 rstartsj = sstartsj + nsends +1; 3829 bufa = *bufa_ptr; 3830 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3831 b_otha = b_oth->a; 3832 } else { 3833 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3834 } 3835 3836 /* a-array */ 3837 /*---------*/ 3838 /* post receives of a-array */ 3839 for (i=0; i<nrecvs; i++){ 3840 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3841 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3842 } 3843 k = 0; 3844 for (i=0; i<nsends; i++){ 3845 nrows = sstarts[i+1]-sstarts[i]; 3846 bufA = bufa+sstartsj[i]; 3847 for (j=0; j<nrows; j++) { 3848 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3849 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3850 for (l=0; l<ncols; l++){ 3851 *bufA++ = vals[l]; 3852 } 3853 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3854 3855 } 3856 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3857 } 3858 /* recvs and sends of a-array are completed */ 3859 i = nrecvs; 3860 while (i--) { 3861 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3862 } 3863 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3864 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 3865 3866 if (scall == MAT_INITIAL_MATRIX){ 3867 /* put together the new matrix */ 3868 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3869 3870 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3871 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3872 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3873 b_oth->freedata = PETSC_TRUE; 3874 b_oth->nonew = 0; 3875 3876 ierr = PetscFree(bufj);CHKERRQ(ierr); 3877 if (!startsj || !bufa_ptr){ 3878 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3879 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3880 } else { 3881 *startsj = sstartsj; 3882 *bufa_ptr = bufa; 3883 } 3884 } 3885 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3886 3887 PetscFunctionReturn(0); 3888 } 3889 3890 #undef __FUNCT__ 3891 #define __FUNCT__ "MatGetCommunicationStructs" 3892 /*@C 3893 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 3894 3895 Not Collective 3896 3897 Input Parameters: 3898 . A - The matrix in mpiaij format 3899 3900 Output Parameter: 3901 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 3902 . colmap - A map from global column index to local index into lvec 3903 - multScatter - A scatter from the argument of a matrix-vector product to lvec 3904 3905 Level: developer 3906 3907 @*/ 3908 #if defined (PETSC_USE_CTABLE) 3909 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 3910 #else 3911 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 3912 #endif 3913 { 3914 Mat_MPIAIJ *a; 3915 3916 PetscFunctionBegin; 3917 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 3918 PetscValidPointer(lvec, 2) 3919 PetscValidPointer(colmap, 3) 3920 PetscValidPointer(multScatter, 4) 3921 a = (Mat_MPIAIJ *) A->data; 3922 if (lvec) *lvec = a->lvec; 3923 if (colmap) *colmap = a->colmap; 3924 if (multScatter) *multScatter = a->Mvctx; 3925 PetscFunctionReturn(0); 3926 } 3927 3928 /*MC 3929 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3930 3931 Options Database Keys: 3932 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3933 3934 Level: beginner 3935 3936 .seealso: MatCreateMPIAIJ 3937 M*/ 3938 3939 EXTERN_C_BEGIN 3940 #undef __FUNCT__ 3941 #define __FUNCT__ "MatCreate_MPIAIJ" 3942 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3943 { 3944 Mat_MPIAIJ *b; 3945 PetscErrorCode ierr; 3946 PetscMPIInt size; 3947 3948 PetscFunctionBegin; 3949 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3950 3951 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3952 B->data = (void*)b; 3953 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3954 B->factor = 0; 3955 B->rmap.bs = 1; 3956 B->assembled = PETSC_FALSE; 3957 B->mapping = 0; 3958 3959 B->insertmode = NOT_SET_VALUES; 3960 b->size = size; 3961 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3962 3963 /* build cache for off array entries formed */ 3964 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3965 b->donotstash = PETSC_FALSE; 3966 b->colmap = 0; 3967 b->garray = 0; 3968 b->roworiented = PETSC_TRUE; 3969 3970 /* stuff used for matrix vector multiply */ 3971 b->lvec = PETSC_NULL; 3972 b->Mvctx = PETSC_NULL; 3973 3974 /* stuff for MatGetRow() */ 3975 b->rowindices = 0; 3976 b->rowvalues = 0; 3977 b->getrowactive = PETSC_FALSE; 3978 3979 3980 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3981 "MatStoreValues_MPIAIJ", 3982 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3983 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3984 "MatRetrieveValues_MPIAIJ", 3985 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3986 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3987 "MatGetDiagonalBlock_MPIAIJ", 3988 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3989 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3990 "MatIsTranspose_MPIAIJ", 3991 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3992 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3993 "MatMPIAIJSetPreallocation_MPIAIJ", 3994 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3995 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3996 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3997 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3998 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3999 "MatDiagonalScaleLocal_MPIAIJ", 4000 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4001 PetscFunctionReturn(0); 4002 } 4003 EXTERN_C_END 4004 4005 /* 4006 Special version for direct calls from Fortran 4007 */ 4008 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4009 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4010 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4011 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4012 #endif 4013 4014 /* Change these macros so can be used in void function */ 4015 #undef CHKERRQ 4016 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4017 #undef SETERRQ2 4018 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4019 #undef SETERRQ 4020 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4021 4022 EXTERN_C_BEGIN 4023 #undef __FUNCT__ 4024 #define __FUNCT__ "matsetvaluesmpiaij_" 4025 void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv) 4026 { 4027 Mat mat = *mmat; 4028 PetscInt m = *mm, n = *mn; 4029 InsertMode addv = *maddv; 4030 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4031 PetscScalar value; 4032 PetscErrorCode ierr; 4033 4034 MatPreallocated(mat); 4035 if (mat->insertmode == NOT_SET_VALUES) { 4036 mat->insertmode = addv; 4037 } 4038 #if defined(PETSC_USE_DEBUG) 4039 else if (mat->insertmode != addv) { 4040 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4041 } 4042 #endif 4043 { 4044 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4045 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4046 PetscTruth roworiented = aij->roworiented; 4047 4048 /* Some Variables required in the macro */ 4049 Mat A = aij->A; 4050 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4051 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4052 PetscScalar *aa = a->a; 4053 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4054 Mat B = aij->B; 4055 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4056 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4057 PetscScalar *ba = b->a; 4058 4059 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4060 PetscInt nonew = a->nonew; 4061 PetscScalar *ap1,*ap2; 4062 4063 PetscFunctionBegin; 4064 for (i=0; i<m; i++) { 4065 if (im[i] < 0) continue; 4066 #if defined(PETSC_USE_DEBUG) 4067 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4068 #endif 4069 if (im[i] >= rstart && im[i] < rend) { 4070 row = im[i] - rstart; 4071 lastcol1 = -1; 4072 rp1 = aj + ai[row]; 4073 ap1 = aa + ai[row]; 4074 rmax1 = aimax[row]; 4075 nrow1 = ailen[row]; 4076 low1 = 0; 4077 high1 = nrow1; 4078 lastcol2 = -1; 4079 rp2 = bj + bi[row]; 4080 ap2 = ba + bi[row]; 4081 rmax2 = bimax[row]; 4082 nrow2 = bilen[row]; 4083 low2 = 0; 4084 high2 = nrow2; 4085 4086 for (j=0; j<n; j++) { 4087 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4088 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4089 if (in[j] >= cstart && in[j] < cend){ 4090 col = in[j] - cstart; 4091 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4092 } else if (in[j] < 0) continue; 4093 #if defined(PETSC_USE_DEBUG) 4094 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4095 #endif 4096 else { 4097 if (mat->was_assembled) { 4098 if (!aij->colmap) { 4099 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4100 } 4101 #if defined (PETSC_USE_CTABLE) 4102 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4103 col--; 4104 #else 4105 col = aij->colmap[in[j]] - 1; 4106 #endif 4107 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4108 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4109 col = in[j]; 4110 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4111 B = aij->B; 4112 b = (Mat_SeqAIJ*)B->data; 4113 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4114 rp2 = bj + bi[row]; 4115 ap2 = ba + bi[row]; 4116 rmax2 = bimax[row]; 4117 nrow2 = bilen[row]; 4118 low2 = 0; 4119 high2 = nrow2; 4120 bm = aij->B->rmap.n; 4121 ba = b->a; 4122 } 4123 } else col = in[j]; 4124 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4125 } 4126 } 4127 } else { 4128 if (!aij->donotstash) { 4129 if (roworiented) { 4130 if (ignorezeroentries && v[i*n] == 0.0) continue; 4131 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4132 } else { 4133 if (ignorezeroentries && v[i] == 0.0) continue; 4134 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4135 } 4136 } 4137 } 4138 }} 4139 PetscFunctionReturnVoid(); 4140 } 4141 EXTERN_C_END 4142