1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatGetValues_MPIAIJ" 241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 242 { 243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 244 PetscErrorCode ierr; 245 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 246 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 247 248 PetscFunctionBegin; 249 for (i=0; i<m; i++) { 250 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 251 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 252 if (idxm[i] >= rstart && idxm[i] < rend) { 253 row = idxm[i] - rstart; 254 for (j=0; j<n; j++) { 255 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 256 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 257 if (idxn[j] >= cstart && idxn[j] < cend){ 258 col = idxn[j] - cstart; 259 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 260 } else { 261 if (!aij->colmap) { 262 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 263 } 264 #if defined (PETSC_USE_CTABLE) 265 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 266 col --; 267 #else 268 col = aij->colmap[idxn[j]] - 1; 269 #endif 270 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 271 else { 272 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 273 } 274 } 275 } 276 } else { 277 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 278 } 279 } 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 if (aij->donotstash) { 294 PetscFunctionReturn(0); 295 } 296 297 /* make sure all processors are either in INSERTMODE or ADDMODE */ 298 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 299 if (addv == (ADD_VALUES|INSERT_VALUES)) { 300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 301 } 302 mat->insertmode = addv; /* in case this processor had no cache */ 303 304 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 305 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 306 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 313 { 314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 315 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 316 PetscErrorCode ierr; 317 PetscMPIInt n; 318 PetscInt i,j,rstart,ncols,flg; 319 PetscInt *row,*col,other_disassembled; 320 PetscScalar *val; 321 InsertMode addv = mat->insertmode; 322 323 PetscFunctionBegin; 324 if (!aij->donotstash) { 325 while (1) { 326 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 327 if (!flg) break; 328 329 for (i=0; i<n;) { 330 /* Now identify the consecutive vals belonging to the same row */ 331 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 332 if (j < n) ncols = j-i; 333 else ncols = n-i; 334 /* Now assemble all these values with a single function call */ 335 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 336 i = j; 337 } 338 } 339 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 340 } 341 a->compressedrow.use = PETSC_FALSE; 342 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 344 345 /* determine if any processor has disassembled, if so we must 346 also disassemble ourselfs, in order that we may reassemble. */ 347 /* 348 if nonzero structure of submatrix B cannot change then we know that 349 no processor disassembled thus we can skip this stuff 350 */ 351 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 352 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 353 if (mat->was_assembled && !other_disassembled) { 354 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 355 } 356 } 357 /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */ 358 b = (Mat_SeqAIJ *)aij->B->data; 359 360 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 361 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 362 } 363 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 364 b->compressedrow.use = PETSC_TRUE; 365 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 366 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 367 368 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 369 aij->rowvalues = 0; 370 371 /* used by MatAXPY() */ 372 a->xtoy = 0; b->xtoy = 0; 373 a->XtoY = 0; b->XtoY = 0; 374 375 PetscFunctionReturn(0); 376 } 377 378 #undef __FUNCT__ 379 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 380 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 381 { 382 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 383 PetscErrorCode ierr; 384 385 PetscFunctionBegin; 386 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 387 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "MatZeroRows_MPIAIJ" 393 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 394 { 395 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 396 PetscErrorCode ierr; 397 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 398 PetscInt i,*owners = A->rmap.range; 399 PetscInt *nprocs,j,idx,nsends,row; 400 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 401 PetscInt *rvalues,count,base,slen,*source; 402 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 403 MPI_Comm comm = A->comm; 404 MPI_Request *send_waits,*recv_waits; 405 MPI_Status recv_status,*send_status; 406 #if defined(PETSC_DEBUG) 407 PetscTruth found = PETSC_FALSE; 408 #endif 409 410 PetscFunctionBegin; 411 /* first count number of contributors to each processor */ 412 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 413 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 414 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 415 j = 0; 416 for (i=0; i<N; i++) { 417 if (lastidx > (idx = rows[i])) j = 0; 418 lastidx = idx; 419 for (; j<size; j++) { 420 if (idx >= owners[j] && idx < owners[j+1]) { 421 nprocs[2*j]++; 422 nprocs[2*j+1] = 1; 423 owner[i] = j; 424 #if defined(PETSC_DEBUG) 425 found = PETSC_TRUE; 426 #endif 427 break; 428 } 429 } 430 #if defined(PETSC_DEBUG) 431 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 432 found = PETSC_FALSE; 433 #endif 434 } 435 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 436 437 /* inform other processors of number of messages and max length*/ 438 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 439 440 /* post receives: */ 441 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 442 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 443 for (i=0; i<nrecvs; i++) { 444 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 445 } 446 447 /* do sends: 448 1) starts[i] gives the starting index in svalues for stuff going to 449 the ith processor 450 */ 451 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 452 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 453 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 454 starts[0] = 0; 455 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 456 for (i=0; i<N; i++) { 457 svalues[starts[owner[i]]++] = rows[i]; 458 } 459 460 starts[0] = 0; 461 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 462 count = 0; 463 for (i=0; i<size; i++) { 464 if (nprocs[2*i+1]) { 465 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 466 } 467 } 468 ierr = PetscFree(starts);CHKERRQ(ierr); 469 470 base = owners[rank]; 471 472 /* wait on receives */ 473 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 474 source = lens + nrecvs; 475 count = nrecvs; slen = 0; 476 while (count) { 477 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 478 /* unpack receives into our local space */ 479 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 480 source[imdex] = recv_status.MPI_SOURCE; 481 lens[imdex] = n; 482 slen += n; 483 count--; 484 } 485 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 486 487 /* move the data into the send scatter */ 488 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 489 count = 0; 490 for (i=0; i<nrecvs; i++) { 491 values = rvalues + i*nmax; 492 for (j=0; j<lens[i]; j++) { 493 lrows[count++] = values[j] - base; 494 } 495 } 496 ierr = PetscFree(rvalues);CHKERRQ(ierr); 497 ierr = PetscFree(lens);CHKERRQ(ierr); 498 ierr = PetscFree(owner);CHKERRQ(ierr); 499 ierr = PetscFree(nprocs);CHKERRQ(ierr); 500 501 /* actually zap the local rows */ 502 /* 503 Zero the required rows. If the "diagonal block" of the matrix 504 is square and the user wishes to set the diagonal we use separate 505 code so that MatSetValues() is not called for each diagonal allocating 506 new memory, thus calling lots of mallocs and slowing things down. 507 508 Contributed by: Matthew Knepley 509 */ 510 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 511 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 512 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 513 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 514 } else if (diag != 0.0) { 515 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 516 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 517 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 518 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 519 } 520 for (i = 0; i < slen; i++) { 521 row = lrows[i] + rstart; 522 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 523 } 524 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 525 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 526 } else { 527 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 528 } 529 ierr = PetscFree(lrows);CHKERRQ(ierr); 530 531 /* wait on sends */ 532 if (nsends) { 533 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 534 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 535 ierr = PetscFree(send_status);CHKERRQ(ierr); 536 } 537 ierr = PetscFree(send_waits);CHKERRQ(ierr); 538 ierr = PetscFree(svalues);CHKERRQ(ierr); 539 540 PetscFunctionReturn(0); 541 } 542 543 #undef __FUNCT__ 544 #define __FUNCT__ "MatMult_MPIAIJ" 545 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 546 { 547 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 548 PetscErrorCode ierr; 549 PetscInt nt; 550 551 PetscFunctionBegin; 552 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 553 if (nt != A->cmap.n) { 554 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 555 } 556 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 557 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 558 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 559 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 560 PetscFunctionReturn(0); 561 } 562 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatMultAdd_MPIAIJ" 565 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 566 { 567 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 568 PetscErrorCode ierr; 569 570 PetscFunctionBegin; 571 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 572 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 573 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 574 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 580 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 581 { 582 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 583 PetscErrorCode ierr; 584 PetscTruth merged; 585 586 PetscFunctionBegin; 587 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 588 /* do nondiagonal part */ 589 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 590 if (!merged) { 591 /* send it on its way */ 592 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 593 /* do local part */ 594 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 595 /* receive remote parts: note this assumes the values are not actually */ 596 /* added in yy until the next line, */ 597 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 598 } else { 599 /* do local part */ 600 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 601 /* send it on its way */ 602 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 603 /* values actually were received in the Begin() but we need to call this nop */ 604 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 605 } 606 PetscFunctionReturn(0); 607 } 608 609 EXTERN_C_BEGIN 610 #undef __FUNCT__ 611 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 612 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 613 { 614 MPI_Comm comm; 615 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 616 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 617 IS Me,Notme; 618 PetscErrorCode ierr; 619 PetscInt M,N,first,last,*notme,i; 620 PetscMPIInt size; 621 622 PetscFunctionBegin; 623 624 /* Easy test: symmetric diagonal block */ 625 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 626 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 627 if (!*f) PetscFunctionReturn(0); 628 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 629 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 630 if (size == 1) PetscFunctionReturn(0); 631 632 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 633 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 634 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 635 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 636 for (i=0; i<first; i++) notme[i] = i; 637 for (i=last; i<M; i++) notme[i-last+first] = i; 638 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 639 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 640 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 641 Aoff = Aoffs[0]; 642 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 643 Boff = Boffs[0]; 644 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 645 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 646 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 647 ierr = ISDestroy(Me);CHKERRQ(ierr); 648 ierr = ISDestroy(Notme);CHKERRQ(ierr); 649 650 PetscFunctionReturn(0); 651 } 652 EXTERN_C_END 653 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 656 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 657 { 658 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 659 PetscErrorCode ierr; 660 661 PetscFunctionBegin; 662 /* do nondiagonal part */ 663 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 664 /* send it on its way */ 665 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 666 /* do local part */ 667 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 668 /* receive remote parts */ 669 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 670 PetscFunctionReturn(0); 671 } 672 673 /* 674 This only works correctly for square matrices where the subblock A->A is the 675 diagonal block 676 */ 677 #undef __FUNCT__ 678 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 679 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 680 { 681 PetscErrorCode ierr; 682 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 683 684 PetscFunctionBegin; 685 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 686 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 687 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 688 } 689 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 690 PetscFunctionReturn(0); 691 } 692 693 #undef __FUNCT__ 694 #define __FUNCT__ "MatScale_MPIAIJ" 695 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 696 { 697 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 698 PetscErrorCode ierr; 699 700 PetscFunctionBegin; 701 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 702 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 703 PetscFunctionReturn(0); 704 } 705 706 #undef __FUNCT__ 707 #define __FUNCT__ "MatDestroy_MPIAIJ" 708 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 709 { 710 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 711 PetscErrorCode ierr; 712 713 PetscFunctionBegin; 714 #if defined(PETSC_USE_LOG) 715 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 716 #endif 717 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 718 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 719 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 720 #if defined (PETSC_USE_CTABLE) 721 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 722 #else 723 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 724 #endif 725 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 726 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 727 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 728 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 729 ierr = PetscFree(aij);CHKERRQ(ierr); 730 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 737 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatView_MPIAIJ_Binary" 743 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 744 { 745 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 746 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 747 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 748 PetscErrorCode ierr; 749 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 750 int fd; 751 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 752 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 753 PetscScalar *column_values; 754 755 PetscFunctionBegin; 756 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 757 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 758 nz = A->nz + B->nz; 759 if (!rank) { 760 header[0] = MAT_FILE_COOKIE; 761 header[1] = mat->rmap.N; 762 header[2] = mat->cmap.N; 763 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 764 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 765 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 766 /* get largest number of rows any processor has */ 767 rlen = mat->rmap.n; 768 range = mat->rmap.range; 769 for (i=1; i<size; i++) { 770 rlen = PetscMax(rlen,range[i+1] - range[i]); 771 } 772 } else { 773 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 774 rlen = mat->rmap.n; 775 } 776 777 /* load up the local row counts */ 778 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 779 for (i=0; i<mat->rmap.n; i++) { 780 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 781 } 782 783 /* store the row lengths to the file */ 784 if (!rank) { 785 MPI_Status status; 786 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 787 for (i=1; i<size; i++) { 788 rlen = range[i+1] - range[i]; 789 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 790 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 791 } 792 } else { 793 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 794 } 795 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 796 797 /* load up the local column indices */ 798 nzmax = nz; /* )th processor needs space a largest processor needs */ 799 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 800 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 801 cnt = 0; 802 for (i=0; i<mat->rmap.n; i++) { 803 for (j=B->i[i]; j<B->i[i+1]; j++) { 804 if ( (col = garray[B->j[j]]) > cstart) break; 805 column_indices[cnt++] = col; 806 } 807 for (k=A->i[i]; k<A->i[i+1]; k++) { 808 column_indices[cnt++] = A->j[k] + cstart; 809 } 810 for (; j<B->i[i+1]; j++) { 811 column_indices[cnt++] = garray[B->j[j]]; 812 } 813 } 814 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 815 816 /* store the column indices to the file */ 817 if (!rank) { 818 MPI_Status status; 819 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 820 for (i=1; i<size; i++) { 821 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 822 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 823 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 824 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 825 } 826 } else { 827 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 829 } 830 ierr = PetscFree(column_indices);CHKERRQ(ierr); 831 832 /* load up the local column values */ 833 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 834 cnt = 0; 835 for (i=0; i<mat->rmap.n; i++) { 836 for (j=B->i[i]; j<B->i[i+1]; j++) { 837 if ( garray[B->j[j]] > cstart) break; 838 column_values[cnt++] = B->a[j]; 839 } 840 for (k=A->i[i]; k<A->i[i+1]; k++) { 841 column_values[cnt++] = A->a[k]; 842 } 843 for (; j<B->i[i+1]; j++) { 844 column_values[cnt++] = B->a[j]; 845 } 846 } 847 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 848 849 /* store the column values to the file */ 850 if (!rank) { 851 MPI_Status status; 852 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 853 for (i=1; i<size; i++) { 854 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 855 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 856 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 857 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 858 } 859 } else { 860 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 861 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 862 } 863 ierr = PetscFree(column_values);CHKERRQ(ierr); 864 PetscFunctionReturn(0); 865 } 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 869 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 870 { 871 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 872 PetscErrorCode ierr; 873 PetscMPIInt rank = aij->rank,size = aij->size; 874 PetscTruth isdraw,iascii,isbinary; 875 PetscViewer sviewer; 876 PetscViewerFormat format; 877 878 PetscFunctionBegin; 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 881 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 882 if (iascii) { 883 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 884 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 885 MatInfo info; 886 PetscTruth inodes; 887 888 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 889 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 890 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 891 if (!inodes) { 892 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 893 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 894 } else { 895 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 896 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 897 } 898 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 899 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 900 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 901 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 902 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 903 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } else if (format == PETSC_VIEWER_ASCII_INFO) { 906 PetscInt inodecount,inodelimit,*inodes; 907 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 908 if (inodes) { 909 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 910 } else { 911 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 915 PetscFunctionReturn(0); 916 } 917 } else if (isbinary) { 918 if (size == 1) { 919 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 920 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 921 } else { 922 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 923 } 924 PetscFunctionReturn(0); 925 } else if (isdraw) { 926 PetscDraw draw; 927 PetscTruth isnull; 928 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 929 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 930 } 931 932 if (size == 1) { 933 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 934 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 935 } else { 936 /* assemble the entire matrix onto first processor. */ 937 Mat A; 938 Mat_SeqAIJ *Aloc; 939 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 940 PetscScalar *a; 941 942 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 943 if (!rank) { 944 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 945 } else { 946 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 947 } 948 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 949 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 950 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 951 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 952 953 /* copy over the A part */ 954 Aloc = (Mat_SeqAIJ*)aij->A->data; 955 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 956 row = mat->rmap.rstart; 957 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 958 for (i=0; i<m; i++) { 959 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 960 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 961 } 962 aj = Aloc->j; 963 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 964 965 /* copy over the B part */ 966 Aloc = (Mat_SeqAIJ*)aij->B->data; 967 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 968 row = mat->rmap.rstart; 969 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 970 ct = cols; 971 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 972 for (i=0; i<m; i++) { 973 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 974 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 975 } 976 ierr = PetscFree(ct);CHKERRQ(ierr); 977 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 979 /* 980 Everyone has to call to draw the matrix since the graphics waits are 981 synchronized across all processors that share the PetscDraw object 982 */ 983 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 984 if (!rank) { 985 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 986 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 987 } 988 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 989 ierr = MatDestroy(A);CHKERRQ(ierr); 990 } 991 PetscFunctionReturn(0); 992 } 993 994 #undef __FUNCT__ 995 #define __FUNCT__ "MatView_MPIAIJ" 996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 997 { 998 PetscErrorCode ierr; 999 PetscTruth iascii,isdraw,issocket,isbinary; 1000 1001 PetscFunctionBegin; 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1005 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1006 if (iascii || isdraw || isbinary || issocket) { 1007 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1008 } else { 1009 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1010 } 1011 PetscFunctionReturn(0); 1012 } 1013 1014 1015 1016 #undef __FUNCT__ 1017 #define __FUNCT__ "MatRelax_MPIAIJ" 1018 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1019 { 1020 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1021 PetscErrorCode ierr; 1022 Vec bb1; 1023 1024 PetscFunctionBegin; 1025 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1026 1027 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1028 1029 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1030 if (flag & SOR_ZERO_INITIAL_GUESS) { 1031 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1032 its--; 1033 } 1034 1035 while (its--) { 1036 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1038 1039 /* update rhs: bb1 = bb - B*x */ 1040 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1041 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1042 1043 /* local sweep */ 1044 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1045 CHKERRQ(ierr); 1046 } 1047 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1048 if (flag & SOR_ZERO_INITIAL_GUESS) { 1049 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1050 its--; 1051 } 1052 while (its--) { 1053 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1054 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1055 1056 /* update rhs: bb1 = bb - B*x */ 1057 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1058 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1059 1060 /* local sweep */ 1061 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1062 CHKERRQ(ierr); 1063 } 1064 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1065 if (flag & SOR_ZERO_INITIAL_GUESS) { 1066 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1067 its--; 1068 } 1069 while (its--) { 1070 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1071 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1072 1073 /* update rhs: bb1 = bb - B*x */ 1074 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1075 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1076 1077 /* local sweep */ 1078 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1079 CHKERRQ(ierr); 1080 } 1081 } else { 1082 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1083 } 1084 1085 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatPermute_MPIAIJ" 1091 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1092 { 1093 MPI_Comm comm,pcomm; 1094 PetscInt first,local_size,nrows,*rows; 1095 int ntids; 1096 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1097 PetscErrorCode ierr; 1098 1099 PetscFunctionBegin; 1100 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1101 /* make a collective version of 'rowp' */ 1102 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1103 if (pcomm==comm) { 1104 crowp = rowp; 1105 } else { 1106 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1107 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1108 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1109 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1110 } 1111 /* collect the global row permutation and invert it */ 1112 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1113 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1114 if (pcomm!=comm) { 1115 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1116 } 1117 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1118 /* get the local target indices */ 1119 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1120 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1121 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1122 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1123 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1124 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1125 /* the column permutation is so much easier; 1126 make a local version of 'colp' and invert it */ 1127 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1128 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1129 if (ntids==1) { 1130 lcolp = colp; 1131 } else { 1132 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1133 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1134 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1135 } 1136 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1137 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1138 if (ntids>1) { 1139 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1140 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1141 } 1142 /* now we just get the submatrix */ 1143 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1144 /* clean up */ 1145 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1146 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1152 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1153 { 1154 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1155 Mat A = mat->A,B = mat->B; 1156 PetscErrorCode ierr; 1157 PetscReal isend[5],irecv[5]; 1158 1159 PetscFunctionBegin; 1160 info->block_size = 1.0; 1161 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1162 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1163 isend[3] = info->memory; isend[4] = info->mallocs; 1164 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1165 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1166 isend[3] += info->memory; isend[4] += info->mallocs; 1167 if (flag == MAT_LOCAL) { 1168 info->nz_used = isend[0]; 1169 info->nz_allocated = isend[1]; 1170 info->nz_unneeded = isend[2]; 1171 info->memory = isend[3]; 1172 info->mallocs = isend[4]; 1173 } else if (flag == MAT_GLOBAL_MAX) { 1174 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1175 info->nz_used = irecv[0]; 1176 info->nz_allocated = irecv[1]; 1177 info->nz_unneeded = irecv[2]; 1178 info->memory = irecv[3]; 1179 info->mallocs = irecv[4]; 1180 } else if (flag == MAT_GLOBAL_SUM) { 1181 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1182 info->nz_used = irecv[0]; 1183 info->nz_allocated = irecv[1]; 1184 info->nz_unneeded = irecv[2]; 1185 info->memory = irecv[3]; 1186 info->mallocs = irecv[4]; 1187 } 1188 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1189 info->fill_ratio_needed = 0; 1190 info->factor_mallocs = 0; 1191 info->rows_global = (double)matin->rmap.N; 1192 info->columns_global = (double)matin->cmap.N; 1193 info->rows_local = (double)matin->rmap.n; 1194 info->columns_local = (double)matin->cmap.N; 1195 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatSetOption_MPIAIJ" 1201 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1202 { 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 switch (op) { 1208 case MAT_NO_NEW_NONZERO_LOCATIONS: 1209 case MAT_YES_NEW_NONZERO_LOCATIONS: 1210 case MAT_COLUMNS_UNSORTED: 1211 case MAT_COLUMNS_SORTED: 1212 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1213 case MAT_KEEP_ZEROED_ROWS: 1214 case MAT_NEW_NONZERO_LOCATION_ERR: 1215 case MAT_USE_INODES: 1216 case MAT_DO_NOT_USE_INODES: 1217 case MAT_IGNORE_ZERO_ENTRIES: 1218 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1219 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1220 break; 1221 case MAT_ROW_ORIENTED: 1222 a->roworiented = PETSC_TRUE; 1223 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1224 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1225 break; 1226 case MAT_ROWS_SORTED: 1227 case MAT_ROWS_UNSORTED: 1228 case MAT_YES_NEW_DIAGONALS: 1229 ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr); 1230 break; 1231 case MAT_COLUMN_ORIENTED: 1232 a->roworiented = PETSC_FALSE; 1233 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1234 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1235 break; 1236 case MAT_IGNORE_OFF_PROC_ENTRIES: 1237 a->donotstash = PETSC_TRUE; 1238 break; 1239 case MAT_NO_NEW_DIAGONALS: 1240 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1241 case MAT_SYMMETRIC: 1242 case MAT_STRUCTURALLY_SYMMETRIC: 1243 case MAT_HERMITIAN: 1244 case MAT_SYMMETRY_ETERNAL: 1245 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1246 break; 1247 case MAT_NOT_SYMMETRIC: 1248 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1249 case MAT_NOT_HERMITIAN: 1250 case MAT_NOT_SYMMETRY_ETERNAL: 1251 break; 1252 default: 1253 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatGetRow_MPIAIJ" 1260 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1261 { 1262 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1263 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1264 PetscErrorCode ierr; 1265 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1266 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1267 PetscInt *cmap,*idx_p; 1268 1269 PetscFunctionBegin; 1270 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1271 mat->getrowactive = PETSC_TRUE; 1272 1273 if (!mat->rowvalues && (idx || v)) { 1274 /* 1275 allocate enough space to hold information from the longest row. 1276 */ 1277 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1278 PetscInt max = 1,tmp; 1279 for (i=0; i<matin->rmap.n; i++) { 1280 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1281 if (max < tmp) { max = tmp; } 1282 } 1283 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1284 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1285 } 1286 1287 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1288 lrow = row - rstart; 1289 1290 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1291 if (!v) {pvA = 0; pvB = 0;} 1292 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1293 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1294 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1295 nztot = nzA + nzB; 1296 1297 cmap = mat->garray; 1298 if (v || idx) { 1299 if (nztot) { 1300 /* Sort by increasing column numbers, assuming A and B already sorted */ 1301 PetscInt imark = -1; 1302 if (v) { 1303 *v = v_p = mat->rowvalues; 1304 for (i=0; i<nzB; i++) { 1305 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1306 else break; 1307 } 1308 imark = i; 1309 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1310 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1311 } 1312 if (idx) { 1313 *idx = idx_p = mat->rowindices; 1314 if (imark > -1) { 1315 for (i=0; i<imark; i++) { 1316 idx_p[i] = cmap[cworkB[i]]; 1317 } 1318 } else { 1319 for (i=0; i<nzB; i++) { 1320 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1321 else break; 1322 } 1323 imark = i; 1324 } 1325 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1326 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1327 } 1328 } else { 1329 if (idx) *idx = 0; 1330 if (v) *v = 0; 1331 } 1332 } 1333 *nz = nztot; 1334 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1335 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1341 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1342 { 1343 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1344 1345 PetscFunctionBegin; 1346 if (!aij->getrowactive) { 1347 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1348 } 1349 aij->getrowactive = PETSC_FALSE; 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatNorm_MPIAIJ" 1355 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1356 { 1357 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1358 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1359 PetscErrorCode ierr; 1360 PetscInt i,j,cstart = mat->cmap.rstart; 1361 PetscReal sum = 0.0; 1362 PetscScalar *v; 1363 1364 PetscFunctionBegin; 1365 if (aij->size == 1) { 1366 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1367 } else { 1368 if (type == NORM_FROBENIUS) { 1369 v = amat->a; 1370 for (i=0; i<amat->nz; i++) { 1371 #if defined(PETSC_USE_COMPLEX) 1372 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1373 #else 1374 sum += (*v)*(*v); v++; 1375 #endif 1376 } 1377 v = bmat->a; 1378 for (i=0; i<bmat->nz; i++) { 1379 #if defined(PETSC_USE_COMPLEX) 1380 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1381 #else 1382 sum += (*v)*(*v); v++; 1383 #endif 1384 } 1385 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1386 *norm = sqrt(*norm); 1387 } else if (type == NORM_1) { /* max column norm */ 1388 PetscReal *tmp,*tmp2; 1389 PetscInt *jj,*garray = aij->garray; 1390 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1391 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1392 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1393 *norm = 0.0; 1394 v = amat->a; jj = amat->j; 1395 for (j=0; j<amat->nz; j++) { 1396 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1397 } 1398 v = bmat->a; jj = bmat->j; 1399 for (j=0; j<bmat->nz; j++) { 1400 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1401 } 1402 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1403 for (j=0; j<mat->cmap.N; j++) { 1404 if (tmp2[j] > *norm) *norm = tmp2[j]; 1405 } 1406 ierr = PetscFree(tmp);CHKERRQ(ierr); 1407 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1408 } else if (type == NORM_INFINITY) { /* max row norm */ 1409 PetscReal ntemp = 0.0; 1410 for (j=0; j<aij->A->rmap.n; j++) { 1411 v = amat->a + amat->i[j]; 1412 sum = 0.0; 1413 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1414 sum += PetscAbsScalar(*v); v++; 1415 } 1416 v = bmat->a + bmat->i[j]; 1417 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1418 sum += PetscAbsScalar(*v); v++; 1419 } 1420 if (sum > ntemp) ntemp = sum; 1421 } 1422 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1423 } else { 1424 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1425 } 1426 } 1427 PetscFunctionReturn(0); 1428 } 1429 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "MatTranspose_MPIAIJ" 1432 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1433 { 1434 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1435 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1436 PetscErrorCode ierr; 1437 PetscInt M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1438 Mat B; 1439 PetscScalar *array; 1440 1441 PetscFunctionBegin; 1442 if (!matout && M != N) { 1443 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1444 } 1445 1446 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1447 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1448 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1449 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1450 1451 /* copy over the A part */ 1452 Aloc = (Mat_SeqAIJ*)a->A->data; 1453 m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1454 row = A->rmap.rstart; 1455 for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;} 1456 for (i=0; i<m; i++) { 1457 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1458 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1459 } 1460 aj = Aloc->j; 1461 for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;} 1462 1463 /* copy over the B part */ 1464 Aloc = (Mat_SeqAIJ*)a->B->data; 1465 m = a->B->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1466 row = A->rmap.rstart; 1467 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1468 ct = cols; 1469 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1470 for (i=0; i<m; i++) { 1471 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1472 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1473 } 1474 ierr = PetscFree(ct);CHKERRQ(ierr); 1475 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1476 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1477 if (matout) { 1478 *matout = B; 1479 } else { 1480 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1481 } 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1487 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1488 { 1489 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1490 Mat a = aij->A,b = aij->B; 1491 PetscErrorCode ierr; 1492 PetscInt s1,s2,s3; 1493 1494 PetscFunctionBegin; 1495 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1496 if (rr) { 1497 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1498 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1499 /* Overlap communication with computation. */ 1500 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1501 } 1502 if (ll) { 1503 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1504 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1505 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1506 } 1507 /* scale the diagonal block */ 1508 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1509 1510 if (rr) { 1511 /* Do a scatter end and then right scale the off-diagonal block */ 1512 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1513 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1514 } 1515 1516 PetscFunctionReturn(0); 1517 } 1518 1519 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1522 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1523 { 1524 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1525 PetscErrorCode ierr; 1526 1527 PetscFunctionBegin; 1528 if (!a->rank) { 1529 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1530 } 1531 PetscFunctionReturn(0); 1532 } 1533 1534 #undef __FUNCT__ 1535 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1536 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1537 { 1538 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1539 PetscErrorCode ierr; 1540 1541 PetscFunctionBegin; 1542 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1543 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1544 PetscFunctionReturn(0); 1545 } 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1548 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1549 { 1550 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1551 PetscErrorCode ierr; 1552 1553 PetscFunctionBegin; 1554 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatEqual_MPIAIJ" 1560 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1561 { 1562 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1563 Mat a,b,c,d; 1564 PetscTruth flg; 1565 PetscErrorCode ierr; 1566 1567 PetscFunctionBegin; 1568 a = matA->A; b = matA->B; 1569 c = matB->A; d = matB->B; 1570 1571 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1572 if (flg) { 1573 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1574 } 1575 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1576 PetscFunctionReturn(0); 1577 } 1578 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatCopy_MPIAIJ" 1581 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1582 { 1583 PetscErrorCode ierr; 1584 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1585 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1586 1587 PetscFunctionBegin; 1588 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1589 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1590 /* because of the column compression in the off-processor part of the matrix a->B, 1591 the number of columns in a->B and b->B may be different, hence we cannot call 1592 the MatCopy() directly on the two parts. If need be, we can provide a more 1593 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1594 then copying the submatrices */ 1595 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1596 } else { 1597 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1598 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1599 } 1600 PetscFunctionReturn(0); 1601 } 1602 1603 #undef __FUNCT__ 1604 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1605 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1606 { 1607 PetscErrorCode ierr; 1608 1609 PetscFunctionBegin; 1610 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1611 PetscFunctionReturn(0); 1612 } 1613 1614 #include "petscblaslapack.h" 1615 #undef __FUNCT__ 1616 #define __FUNCT__ "MatAXPY_MPIAIJ" 1617 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1618 { 1619 PetscErrorCode ierr; 1620 PetscInt i; 1621 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1622 PetscBLASInt bnz,one=1; 1623 Mat_SeqAIJ *x,*y; 1624 1625 PetscFunctionBegin; 1626 if (str == SAME_NONZERO_PATTERN) { 1627 PetscScalar alpha = a; 1628 x = (Mat_SeqAIJ *)xx->A->data; 1629 y = (Mat_SeqAIJ *)yy->A->data; 1630 bnz = (PetscBLASInt)x->nz; 1631 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1632 x = (Mat_SeqAIJ *)xx->B->data; 1633 y = (Mat_SeqAIJ *)yy->B->data; 1634 bnz = (PetscBLASInt)x->nz; 1635 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1636 } else if (str == SUBSET_NONZERO_PATTERN) { 1637 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1638 1639 x = (Mat_SeqAIJ *)xx->B->data; 1640 y = (Mat_SeqAIJ *)yy->B->data; 1641 if (y->xtoy && y->XtoY != xx->B) { 1642 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1643 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1644 } 1645 if (!y->xtoy) { /* get xtoy */ 1646 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1647 y->XtoY = xx->B; 1648 } 1649 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1650 } else { 1651 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1652 } 1653 PetscFunctionReturn(0); 1654 } 1655 1656 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1657 1658 #undef __FUNCT__ 1659 #define __FUNCT__ "MatConjugate_MPIAIJ" 1660 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1661 { 1662 #if defined(PETSC_USE_COMPLEX) 1663 PetscErrorCode ierr; 1664 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1665 1666 PetscFunctionBegin; 1667 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1668 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1669 #else 1670 PetscFunctionBegin; 1671 #endif 1672 PetscFunctionReturn(0); 1673 } 1674 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "MatRealPart_MPIAIJ" 1677 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1678 { 1679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1680 PetscErrorCode ierr; 1681 1682 PetscFunctionBegin; 1683 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1684 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1685 PetscFunctionReturn(0); 1686 } 1687 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1690 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1691 { 1692 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1693 PetscErrorCode ierr; 1694 1695 PetscFunctionBegin; 1696 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1697 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1698 PetscFunctionReturn(0); 1699 } 1700 1701 /* -------------------------------------------------------------------*/ 1702 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1703 MatGetRow_MPIAIJ, 1704 MatRestoreRow_MPIAIJ, 1705 MatMult_MPIAIJ, 1706 /* 4*/ MatMultAdd_MPIAIJ, 1707 MatMultTranspose_MPIAIJ, 1708 MatMultTransposeAdd_MPIAIJ, 1709 0, 1710 0, 1711 0, 1712 /*10*/ 0, 1713 0, 1714 0, 1715 MatRelax_MPIAIJ, 1716 MatTranspose_MPIAIJ, 1717 /*15*/ MatGetInfo_MPIAIJ, 1718 MatEqual_MPIAIJ, 1719 MatGetDiagonal_MPIAIJ, 1720 MatDiagonalScale_MPIAIJ, 1721 MatNorm_MPIAIJ, 1722 /*20*/ MatAssemblyBegin_MPIAIJ, 1723 MatAssemblyEnd_MPIAIJ, 1724 0, 1725 MatSetOption_MPIAIJ, 1726 MatZeroEntries_MPIAIJ, 1727 /*25*/ MatZeroRows_MPIAIJ, 1728 0, 1729 0, 1730 0, 1731 0, 1732 /*30*/ MatSetUpPreallocation_MPIAIJ, 1733 0, 1734 0, 1735 0, 1736 0, 1737 /*35*/ MatDuplicate_MPIAIJ, 1738 0, 1739 0, 1740 0, 1741 0, 1742 /*40*/ MatAXPY_MPIAIJ, 1743 MatGetSubMatrices_MPIAIJ, 1744 MatIncreaseOverlap_MPIAIJ, 1745 MatGetValues_MPIAIJ, 1746 MatCopy_MPIAIJ, 1747 /*45*/ MatPrintHelp_MPIAIJ, 1748 MatScale_MPIAIJ, 1749 0, 1750 0, 1751 0, 1752 /*50*/ MatSetBlockSize_MPIAIJ, 1753 0, 1754 0, 1755 0, 1756 0, 1757 /*55*/ MatFDColoringCreate_MPIAIJ, 1758 0, 1759 MatSetUnfactored_MPIAIJ, 1760 MatPermute_MPIAIJ, 1761 0, 1762 /*60*/ MatGetSubMatrix_MPIAIJ, 1763 MatDestroy_MPIAIJ, 1764 MatView_MPIAIJ, 1765 0, 1766 0, 1767 /*65*/ 0, 1768 0, 1769 0, 1770 0, 1771 0, 1772 /*70*/ 0, 1773 0, 1774 MatSetColoring_MPIAIJ, 1775 #if defined(PETSC_HAVE_ADIC) 1776 MatSetValuesAdic_MPIAIJ, 1777 #else 1778 0, 1779 #endif 1780 MatSetValuesAdifor_MPIAIJ, 1781 /*75*/ 0, 1782 0, 1783 0, 1784 0, 1785 0, 1786 /*80*/ 0, 1787 0, 1788 0, 1789 0, 1790 /*84*/ MatLoad_MPIAIJ, 1791 0, 1792 0, 1793 0, 1794 0, 1795 0, 1796 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1797 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1798 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1799 MatPtAP_Basic, 1800 MatPtAPSymbolic_MPIAIJ, 1801 /*95*/ MatPtAPNumeric_MPIAIJ, 1802 0, 1803 0, 1804 0, 1805 0, 1806 /*100*/0, 1807 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1808 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1809 MatConjugate_MPIAIJ, 1810 0, 1811 /*105*/MatSetValuesRow_MPIAIJ, 1812 MatRealPart_MPIAIJ, 1813 MatImaginaryPart_MPIAIJ}; 1814 1815 /* ----------------------------------------------------------------------------------------*/ 1816 1817 EXTERN_C_BEGIN 1818 #undef __FUNCT__ 1819 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1820 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1821 { 1822 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1823 PetscErrorCode ierr; 1824 1825 PetscFunctionBegin; 1826 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1827 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1828 PetscFunctionReturn(0); 1829 } 1830 EXTERN_C_END 1831 1832 EXTERN_C_BEGIN 1833 #undef __FUNCT__ 1834 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1835 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1836 { 1837 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1838 PetscErrorCode ierr; 1839 1840 PetscFunctionBegin; 1841 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1842 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1843 PetscFunctionReturn(0); 1844 } 1845 EXTERN_C_END 1846 1847 #include "petscpc.h" 1848 EXTERN_C_BEGIN 1849 #undef __FUNCT__ 1850 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1851 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1852 { 1853 Mat_MPIAIJ *b; 1854 PetscErrorCode ierr; 1855 PetscInt i; 1856 1857 PetscFunctionBegin; 1858 B->preallocated = PETSC_TRUE; 1859 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1860 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1861 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1862 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1863 1864 B->rmap.bs = B->cmap.bs = 1; 1865 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 1866 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 1867 if (d_nnz) { 1868 for (i=0; i<B->rmap.n; i++) { 1869 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1870 } 1871 } 1872 if (o_nnz) { 1873 for (i=0; i<B->rmap.n; i++) { 1874 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1875 } 1876 } 1877 b = (Mat_MPIAIJ*)B->data; 1878 1879 /* Explicitly create 2 MATSEQAIJ matrices. */ 1880 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1881 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1882 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1883 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1884 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1885 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 1886 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1887 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1888 1889 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1890 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1891 1892 PetscFunctionReturn(0); 1893 } 1894 EXTERN_C_END 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1898 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1899 { 1900 Mat mat; 1901 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1902 PetscErrorCode ierr; 1903 1904 PetscFunctionBegin; 1905 *newmat = 0; 1906 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1907 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 1908 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1909 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1910 a = (Mat_MPIAIJ*)mat->data; 1911 1912 mat->factor = matin->factor; 1913 mat->rmap.bs = matin->rmap.bs; 1914 mat->assembled = PETSC_TRUE; 1915 mat->insertmode = NOT_SET_VALUES; 1916 mat->preallocated = PETSC_TRUE; 1917 1918 a->size = oldmat->size; 1919 a->rank = oldmat->rank; 1920 a->donotstash = oldmat->donotstash; 1921 a->roworiented = oldmat->roworiented; 1922 a->rowindices = 0; 1923 a->rowvalues = 0; 1924 a->getrowactive = PETSC_FALSE; 1925 1926 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 1927 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 1928 1929 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1930 if (oldmat->colmap) { 1931 #if defined (PETSC_USE_CTABLE) 1932 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1933 #else 1934 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1935 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 1936 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 1937 #endif 1938 } else a->colmap = 0; 1939 if (oldmat->garray) { 1940 PetscInt len; 1941 len = oldmat->B->cmap.n; 1942 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1943 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1944 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1945 } else a->garray = 0; 1946 1947 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1948 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1949 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1950 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1951 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1952 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1953 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1954 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1955 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1956 *newmat = mat; 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #include "petscsys.h" 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatLoad_MPIAIJ" 1964 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 1965 { 1966 Mat A; 1967 PetscScalar *vals,*svals; 1968 MPI_Comm comm = ((PetscObject)viewer)->comm; 1969 MPI_Status status; 1970 PetscErrorCode ierr; 1971 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1972 PetscInt i,nz,j,rstart,rend,mmax; 1973 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1974 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1975 PetscInt cend,cstart,n,*rowners; 1976 int fd; 1977 1978 PetscFunctionBegin; 1979 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1980 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1981 if (!rank) { 1982 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1983 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1984 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1985 } 1986 1987 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1988 M = header[1]; N = header[2]; 1989 /* determine ownership of all rows */ 1990 m = M/size + ((M % size) > rank); 1991 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1992 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1993 1994 /* First process needs enough room for process with most rows */ 1995 if (!rank) { 1996 mmax = rowners[1]; 1997 for (i=2; i<size; i++) { 1998 mmax = PetscMax(mmax,rowners[i]); 1999 } 2000 } else mmax = m; 2001 2002 rowners[0] = 0; 2003 for (i=2; i<=size; i++) { 2004 mmax = PetscMax(mmax,rowners[i]); 2005 rowners[i] += rowners[i-1]; 2006 } 2007 rstart = rowners[rank]; 2008 rend = rowners[rank+1]; 2009 2010 /* distribute row lengths to all processors */ 2011 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2012 if (!rank) { 2013 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2014 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2015 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2016 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2017 for (j=0; j<m; j++) { 2018 procsnz[0] += ourlens[j]; 2019 } 2020 for (i=1; i<size; i++) { 2021 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2022 /* calculate the number of nonzeros on each processor */ 2023 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2024 procsnz[i] += rowlengths[j]; 2025 } 2026 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2027 } 2028 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2029 } else { 2030 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2031 } 2032 2033 if (!rank) { 2034 /* determine max buffer needed and allocate it */ 2035 maxnz = 0; 2036 for (i=0; i<size; i++) { 2037 maxnz = PetscMax(maxnz,procsnz[i]); 2038 } 2039 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2040 2041 /* read in my part of the matrix column indices */ 2042 nz = procsnz[0]; 2043 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2044 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2045 2046 /* read in every one elses and ship off */ 2047 for (i=1; i<size; i++) { 2048 nz = procsnz[i]; 2049 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2050 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2051 } 2052 ierr = PetscFree(cols);CHKERRQ(ierr); 2053 } else { 2054 /* determine buffer space needed for message */ 2055 nz = 0; 2056 for (i=0; i<m; i++) { 2057 nz += ourlens[i]; 2058 } 2059 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2060 2061 /* receive message of column indices*/ 2062 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2063 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2064 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2065 } 2066 2067 /* determine column ownership if matrix is not square */ 2068 if (N != M) { 2069 n = N/size + ((N % size) > rank); 2070 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2071 cstart = cend - n; 2072 } else { 2073 cstart = rstart; 2074 cend = rend; 2075 n = cend - cstart; 2076 } 2077 2078 /* loop over local rows, determining number of off diagonal entries */ 2079 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2080 jj = 0; 2081 for (i=0; i<m; i++) { 2082 for (j=0; j<ourlens[i]; j++) { 2083 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2084 jj++; 2085 } 2086 } 2087 2088 /* create our matrix */ 2089 for (i=0; i<m; i++) { 2090 ourlens[i] -= offlens[i]; 2091 } 2092 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2093 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2094 ierr = MatSetType(A,type);CHKERRQ(ierr); 2095 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2096 2097 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2098 for (i=0; i<m; i++) { 2099 ourlens[i] += offlens[i]; 2100 } 2101 2102 if (!rank) { 2103 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2104 2105 /* read in my part of the matrix numerical values */ 2106 nz = procsnz[0]; 2107 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2108 2109 /* insert into matrix */ 2110 jj = rstart; 2111 smycols = mycols; 2112 svals = vals; 2113 for (i=0; i<m; i++) { 2114 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2115 smycols += ourlens[i]; 2116 svals += ourlens[i]; 2117 jj++; 2118 } 2119 2120 /* read in other processors and ship out */ 2121 for (i=1; i<size; i++) { 2122 nz = procsnz[i]; 2123 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2124 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2125 } 2126 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2127 } else { 2128 /* receive numeric values */ 2129 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2130 2131 /* receive message of values*/ 2132 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2133 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2134 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2135 2136 /* insert into matrix */ 2137 jj = rstart; 2138 smycols = mycols; 2139 svals = vals; 2140 for (i=0; i<m; i++) { 2141 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2142 smycols += ourlens[i]; 2143 svals += ourlens[i]; 2144 jj++; 2145 } 2146 } 2147 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2148 ierr = PetscFree(vals);CHKERRQ(ierr); 2149 ierr = PetscFree(mycols);CHKERRQ(ierr); 2150 ierr = PetscFree(rowners);CHKERRQ(ierr); 2151 2152 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2153 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2154 *newmat = A; 2155 PetscFunctionReturn(0); 2156 } 2157 2158 #undef __FUNCT__ 2159 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2160 /* 2161 Not great since it makes two copies of the submatrix, first an SeqAIJ 2162 in local and then by concatenating the local matrices the end result. 2163 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2164 */ 2165 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2166 { 2167 PetscErrorCode ierr; 2168 PetscMPIInt rank,size; 2169 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2170 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2171 Mat *local,M,Mreuse; 2172 PetscScalar *vwork,*aa; 2173 MPI_Comm comm = mat->comm; 2174 Mat_SeqAIJ *aij; 2175 2176 2177 PetscFunctionBegin; 2178 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2179 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2180 2181 if (call == MAT_REUSE_MATRIX) { 2182 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2183 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2184 local = &Mreuse; 2185 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2186 } else { 2187 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2188 Mreuse = *local; 2189 ierr = PetscFree(local);CHKERRQ(ierr); 2190 } 2191 2192 /* 2193 m - number of local rows 2194 n - number of columns (same on all processors) 2195 rstart - first row in new global matrix generated 2196 */ 2197 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2198 if (call == MAT_INITIAL_MATRIX) { 2199 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2200 ii = aij->i; 2201 jj = aij->j; 2202 2203 /* 2204 Determine the number of non-zeros in the diagonal and off-diagonal 2205 portions of the matrix in order to do correct preallocation 2206 */ 2207 2208 /* first get start and end of "diagonal" columns */ 2209 if (csize == PETSC_DECIDE) { 2210 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2211 if (mglobal == n) { /* square matrix */ 2212 nlocal = m; 2213 } else { 2214 nlocal = n/size + ((n % size) > rank); 2215 } 2216 } else { 2217 nlocal = csize; 2218 } 2219 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2220 rstart = rend - nlocal; 2221 if (rank == size - 1 && rend != n) { 2222 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2223 } 2224 2225 /* next, compute all the lengths */ 2226 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2227 olens = dlens + m; 2228 for (i=0; i<m; i++) { 2229 jend = ii[i+1] - ii[i]; 2230 olen = 0; 2231 dlen = 0; 2232 for (j=0; j<jend; j++) { 2233 if (*jj < rstart || *jj >= rend) olen++; 2234 else dlen++; 2235 jj++; 2236 } 2237 olens[i] = olen; 2238 dlens[i] = dlen; 2239 } 2240 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2241 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2242 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2243 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2244 ierr = PetscFree(dlens);CHKERRQ(ierr); 2245 } else { 2246 PetscInt ml,nl; 2247 2248 M = *newmat; 2249 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2250 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2251 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2252 /* 2253 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2254 rather than the slower MatSetValues(). 2255 */ 2256 M->was_assembled = PETSC_TRUE; 2257 M->assembled = PETSC_FALSE; 2258 } 2259 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2260 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2261 ii = aij->i; 2262 jj = aij->j; 2263 aa = aij->a; 2264 for (i=0; i<m; i++) { 2265 row = rstart + i; 2266 nz = ii[i+1] - ii[i]; 2267 cwork = jj; jj += nz; 2268 vwork = aa; aa += nz; 2269 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2270 } 2271 2272 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2273 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2274 *newmat = M; 2275 2276 /* save submatrix used in processor for next request */ 2277 if (call == MAT_INITIAL_MATRIX) { 2278 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2279 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2280 } 2281 2282 PetscFunctionReturn(0); 2283 } 2284 2285 EXTERN_C_BEGIN 2286 #undef __FUNCT__ 2287 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2288 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2289 { 2290 PetscInt m,cstart, cend,j,nnz,i,d; 2291 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2292 const PetscInt *JJ; 2293 PetscScalar *values; 2294 PetscErrorCode ierr; 2295 2296 PetscFunctionBegin; 2297 if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]); 2298 2299 B->rmap.bs = B->cmap.bs = 1; 2300 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2301 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2302 m = B->rmap.n; 2303 cstart = B->cmap.rstart; 2304 cend = B->cmap.rend; 2305 rstart = B->rmap.rstart; 2306 2307 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2308 o_nnz = d_nnz + m; 2309 2310 for (i=0; i<m; i++) { 2311 nnz = I[i+1]- I[i]; 2312 JJ = J + I[i]; 2313 nnz_max = PetscMax(nnz_max,nnz); 2314 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2315 for (j=0; j<nnz; j++) { 2316 if (*JJ >= cstart) break; 2317 JJ++; 2318 } 2319 d = 0; 2320 for (; j<nnz; j++) { 2321 if (*JJ++ >= cend) break; 2322 d++; 2323 } 2324 d_nnz[i] = d; 2325 o_nnz[i] = nnz - d; 2326 } 2327 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2328 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2329 2330 if (v) values = (PetscScalar*)v; 2331 else { 2332 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2333 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2334 } 2335 2336 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2337 for (i=0; i<m; i++) { 2338 ii = i + rstart; 2339 nnz = I[i+1]- I[i]; 2340 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2341 } 2342 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2343 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2344 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2345 2346 if (!v) { 2347 ierr = PetscFree(values);CHKERRQ(ierr); 2348 } 2349 PetscFunctionReturn(0); 2350 } 2351 EXTERN_C_END 2352 2353 #undef __FUNCT__ 2354 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2355 /*@ 2356 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2357 (the default parallel PETSc format). 2358 2359 Collective on MPI_Comm 2360 2361 Input Parameters: 2362 + B - the matrix 2363 . i - the indices into j for the start of each local row (starts with zero) 2364 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2365 - v - optional values in the matrix 2366 2367 Level: developer 2368 2369 .keywords: matrix, aij, compressed row, sparse, parallel 2370 2371 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2372 @*/ 2373 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2374 { 2375 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2376 2377 PetscFunctionBegin; 2378 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2379 if (f) { 2380 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2381 } 2382 PetscFunctionReturn(0); 2383 } 2384 2385 #undef __FUNCT__ 2386 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2387 /*@C 2388 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2389 (the default parallel PETSc format). For good matrix assembly performance 2390 the user should preallocate the matrix storage by setting the parameters 2391 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2392 performance can be increased by more than a factor of 50. 2393 2394 Collective on MPI_Comm 2395 2396 Input Parameters: 2397 + A - the matrix 2398 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2399 (same value is used for all local rows) 2400 . d_nnz - array containing the number of nonzeros in the various rows of the 2401 DIAGONAL portion of the local submatrix (possibly different for each row) 2402 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2403 The size of this array is equal to the number of local rows, i.e 'm'. 2404 You must leave room for the diagonal entry even if it is zero. 2405 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2406 submatrix (same value is used for all local rows). 2407 - o_nnz - array containing the number of nonzeros in the various rows of the 2408 OFF-DIAGONAL portion of the local submatrix (possibly different for 2409 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2410 structure. The size of this array is equal to the number 2411 of local rows, i.e 'm'. 2412 2413 If the *_nnz parameter is given then the *_nz parameter is ignored 2414 2415 The AIJ format (also called the Yale sparse matrix format or 2416 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2417 storage. The stored row and column indices begin with zero. See the users manual for details. 2418 2419 The parallel matrix is partitioned such that the first m0 rows belong to 2420 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2421 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2422 2423 The DIAGONAL portion of the local submatrix of a processor can be defined 2424 as the submatrix which is obtained by extraction the part corresponding 2425 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2426 first row that belongs to the processor, and r2 is the last row belonging 2427 to the this processor. This is a square mxm matrix. The remaining portion 2428 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2429 2430 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2431 2432 Example usage: 2433 2434 Consider the following 8x8 matrix with 34 non-zero values, that is 2435 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2436 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2437 as follows: 2438 2439 .vb 2440 1 2 0 | 0 3 0 | 0 4 2441 Proc0 0 5 6 | 7 0 0 | 8 0 2442 9 0 10 | 11 0 0 | 12 0 2443 ------------------------------------- 2444 13 0 14 | 15 16 17 | 0 0 2445 Proc1 0 18 0 | 19 20 21 | 0 0 2446 0 0 0 | 22 23 0 | 24 0 2447 ------------------------------------- 2448 Proc2 25 26 27 | 0 0 28 | 29 0 2449 30 0 0 | 31 32 33 | 0 34 2450 .ve 2451 2452 This can be represented as a collection of submatrices as: 2453 2454 .vb 2455 A B C 2456 D E F 2457 G H I 2458 .ve 2459 2460 Where the submatrices A,B,C are owned by proc0, D,E,F are 2461 owned by proc1, G,H,I are owned by proc2. 2462 2463 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2464 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2465 The 'M','N' parameters are 8,8, and have the same values on all procs. 2466 2467 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2468 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2469 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2470 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2471 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2472 matrix, ans [DF] as another SeqAIJ matrix. 2473 2474 When d_nz, o_nz parameters are specified, d_nz storage elements are 2475 allocated for every row of the local diagonal submatrix, and o_nz 2476 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2477 One way to choose d_nz and o_nz is to use the max nonzerors per local 2478 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2479 In this case, the values of d_nz,o_nz are: 2480 .vb 2481 proc0 : dnz = 2, o_nz = 2 2482 proc1 : dnz = 3, o_nz = 2 2483 proc2 : dnz = 1, o_nz = 4 2484 .ve 2485 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2486 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2487 for proc3. i.e we are using 12+15+10=37 storage locations to store 2488 34 values. 2489 2490 When d_nnz, o_nnz parameters are specified, the storage is specified 2491 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2492 In the above case the values for d_nnz,o_nnz are: 2493 .vb 2494 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2495 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2496 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2497 .ve 2498 Here the space allocated is sum of all the above values i.e 34, and 2499 hence pre-allocation is perfect. 2500 2501 Level: intermediate 2502 2503 .keywords: matrix, aij, compressed row, sparse, parallel 2504 2505 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2506 MPIAIJ 2507 @*/ 2508 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2509 { 2510 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2511 2512 PetscFunctionBegin; 2513 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2514 if (f) { 2515 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2516 } 2517 PetscFunctionReturn(0); 2518 } 2519 2520 #undef __FUNCT__ 2521 #define __FUNCT__ "MatCreateMPIAIJ" 2522 /*@C 2523 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2524 (the default parallel PETSc format). For good matrix assembly performance 2525 the user should preallocate the matrix storage by setting the parameters 2526 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2527 performance can be increased by more than a factor of 50. 2528 2529 Collective on MPI_Comm 2530 2531 Input Parameters: 2532 + comm - MPI communicator 2533 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2534 This value should be the same as the local size used in creating the 2535 y vector for the matrix-vector product y = Ax. 2536 . n - This value should be the same as the local size used in creating the 2537 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2538 calculated if N is given) For square matrices n is almost always m. 2539 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2540 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2541 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2542 (same value is used for all local rows) 2543 . d_nnz - array containing the number of nonzeros in the various rows of the 2544 DIAGONAL portion of the local submatrix (possibly different for each row) 2545 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2546 The size of this array is equal to the number of local rows, i.e 'm'. 2547 You must leave room for the diagonal entry even if it is zero. 2548 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2549 submatrix (same value is used for all local rows). 2550 - o_nnz - array containing the number of nonzeros in the various rows of the 2551 OFF-DIAGONAL portion of the local submatrix (possibly different for 2552 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2553 structure. The size of this array is equal to the number 2554 of local rows, i.e 'm'. 2555 2556 Output Parameter: 2557 . A - the matrix 2558 2559 Notes: 2560 If the *_nnz parameter is given then the *_nz parameter is ignored 2561 2562 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2563 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2564 storage requirements for this matrix. 2565 2566 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2567 processor than it must be used on all processors that share the object for 2568 that argument. 2569 2570 The user MUST specify either the local or global matrix dimensions 2571 (possibly both). 2572 2573 The parallel matrix is partitioned such that the first m0 rows belong to 2574 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2575 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2576 2577 The DIAGONAL portion of the local submatrix of a processor can be defined 2578 as the submatrix which is obtained by extraction the part corresponding 2579 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2580 first row that belongs to the processor, and r2 is the last row belonging 2581 to the this processor. This is a square mxm matrix. The remaining portion 2582 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2583 2584 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2585 2586 When calling this routine with a single process communicator, a matrix of 2587 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2588 type of communicator, use the construction mechanism: 2589 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2590 2591 By default, this format uses inodes (identical nodes) when possible. 2592 We search for consecutive rows with the same nonzero structure, thereby 2593 reusing matrix information to achieve increased efficiency. 2594 2595 Options Database Keys: 2596 + -mat_no_inode - Do not use inodes 2597 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2598 - -mat_aij_oneindex - Internally use indexing starting at 1 2599 rather than 0. Note that when calling MatSetValues(), 2600 the user still MUST index entries starting at 0! 2601 2602 2603 Example usage: 2604 2605 Consider the following 8x8 matrix with 34 non-zero values, that is 2606 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2607 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2608 as follows: 2609 2610 .vb 2611 1 2 0 | 0 3 0 | 0 4 2612 Proc0 0 5 6 | 7 0 0 | 8 0 2613 9 0 10 | 11 0 0 | 12 0 2614 ------------------------------------- 2615 13 0 14 | 15 16 17 | 0 0 2616 Proc1 0 18 0 | 19 20 21 | 0 0 2617 0 0 0 | 22 23 0 | 24 0 2618 ------------------------------------- 2619 Proc2 25 26 27 | 0 0 28 | 29 0 2620 30 0 0 | 31 32 33 | 0 34 2621 .ve 2622 2623 This can be represented as a collection of submatrices as: 2624 2625 .vb 2626 A B C 2627 D E F 2628 G H I 2629 .ve 2630 2631 Where the submatrices A,B,C are owned by proc0, D,E,F are 2632 owned by proc1, G,H,I are owned by proc2. 2633 2634 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2635 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2636 The 'M','N' parameters are 8,8, and have the same values on all procs. 2637 2638 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2639 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2640 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2641 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2642 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2643 matrix, ans [DF] as another SeqAIJ matrix. 2644 2645 When d_nz, o_nz parameters are specified, d_nz storage elements are 2646 allocated for every row of the local diagonal submatrix, and o_nz 2647 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2648 One way to choose d_nz and o_nz is to use the max nonzerors per local 2649 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2650 In this case, the values of d_nz,o_nz are: 2651 .vb 2652 proc0 : dnz = 2, o_nz = 2 2653 proc1 : dnz = 3, o_nz = 2 2654 proc2 : dnz = 1, o_nz = 4 2655 .ve 2656 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2657 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2658 for proc3. i.e we are using 12+15+10=37 storage locations to store 2659 34 values. 2660 2661 When d_nnz, o_nnz parameters are specified, the storage is specified 2662 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2663 In the above case the values for d_nnz,o_nnz are: 2664 .vb 2665 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2666 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2667 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2668 .ve 2669 Here the space allocated is sum of all the above values i.e 34, and 2670 hence pre-allocation is perfect. 2671 2672 Level: intermediate 2673 2674 .keywords: matrix, aij, compressed row, sparse, parallel 2675 2676 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2677 MPIAIJ 2678 @*/ 2679 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2680 { 2681 PetscErrorCode ierr; 2682 PetscMPIInt size; 2683 2684 PetscFunctionBegin; 2685 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2686 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2687 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2688 if (size > 1) { 2689 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2690 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2691 } else { 2692 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2693 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2694 } 2695 PetscFunctionReturn(0); 2696 } 2697 2698 #undef __FUNCT__ 2699 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2700 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2701 { 2702 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2703 2704 PetscFunctionBegin; 2705 *Ad = a->A; 2706 *Ao = a->B; 2707 *colmap = a->garray; 2708 PetscFunctionReturn(0); 2709 } 2710 2711 #undef __FUNCT__ 2712 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2713 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2714 { 2715 PetscErrorCode ierr; 2716 PetscInt i; 2717 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2718 2719 PetscFunctionBegin; 2720 if (coloring->ctype == IS_COLORING_LOCAL) { 2721 ISColoringValue *allcolors,*colors; 2722 ISColoring ocoloring; 2723 2724 /* set coloring for diagonal portion */ 2725 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2726 2727 /* set coloring for off-diagonal portion */ 2728 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2729 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2730 for (i=0; i<a->B->cmap.n; i++) { 2731 colors[i] = allcolors[a->garray[i]]; 2732 } 2733 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2734 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->cmap.n,coloring->n,colors,&ocoloring);CHKERRQ(ierr); 2735 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2736 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2737 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2738 ISColoringValue *colors; 2739 PetscInt *larray; 2740 ISColoring ocoloring; 2741 2742 /* set coloring for diagonal portion */ 2743 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2744 for (i=0; i<a->A->cmap.n; i++) { 2745 larray[i] = i + A->cmap.rstart; 2746 } 2747 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2748 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2749 for (i=0; i<a->A->cmap.n; i++) { 2750 colors[i] = coloring->colors[larray[i]]; 2751 } 2752 ierr = PetscFree(larray);CHKERRQ(ierr); 2753 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->cmap.n,coloring->n,colors,&ocoloring);CHKERRQ(ierr); 2754 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2755 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2756 2757 /* set coloring for off-diagonal portion */ 2758 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2759 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2760 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2761 for (i=0; i<a->B->cmap.n; i++) { 2762 colors[i] = coloring->colors[larray[i]]; 2763 } 2764 ierr = PetscFree(larray);CHKERRQ(ierr); 2765 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->cmap.n,coloring->n,colors,&ocoloring);CHKERRQ(ierr); 2766 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2767 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2768 } else { 2769 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2770 } 2771 2772 PetscFunctionReturn(0); 2773 } 2774 2775 #if defined(PETSC_HAVE_ADIC) 2776 #undef __FUNCT__ 2777 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2778 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2779 { 2780 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2781 PetscErrorCode ierr; 2782 2783 PetscFunctionBegin; 2784 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2785 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2786 PetscFunctionReturn(0); 2787 } 2788 #endif 2789 2790 #undef __FUNCT__ 2791 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2792 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2793 { 2794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2795 PetscErrorCode ierr; 2796 2797 PetscFunctionBegin; 2798 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2799 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2800 PetscFunctionReturn(0); 2801 } 2802 2803 #undef __FUNCT__ 2804 #define __FUNCT__ "MatMerge" 2805 /*@C 2806 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2807 matrices from each processor 2808 2809 Collective on MPI_Comm 2810 2811 Input Parameters: 2812 + comm - the communicators the parallel matrix will live on 2813 . inmat - the input sequential matrices 2814 . n - number of local columns (or PETSC_DECIDE) 2815 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2816 2817 Output Parameter: 2818 . outmat - the parallel matrix generated 2819 2820 Level: advanced 2821 2822 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2823 2824 @*/ 2825 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2826 { 2827 PetscErrorCode ierr; 2828 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2829 PetscInt *indx; 2830 PetscScalar *values; 2831 2832 PetscFunctionBegin; 2833 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2834 if (scall == MAT_INITIAL_MATRIX){ 2835 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2836 if (n == PETSC_DECIDE){ 2837 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 2838 } 2839 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2840 rstart -= m; 2841 2842 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2843 for (i=0;i<m;i++) { 2844 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2845 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2846 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2847 } 2848 /* This routine will ONLY return MPIAIJ type matrix */ 2849 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2850 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2851 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2852 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2853 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2854 2855 } else if (scall == MAT_REUSE_MATRIX){ 2856 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2857 } else { 2858 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2859 } 2860 2861 for (i=0;i<m;i++) { 2862 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2863 I = i + rstart; 2864 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2865 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2866 } 2867 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2868 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2869 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2870 2871 PetscFunctionReturn(0); 2872 } 2873 2874 #undef __FUNCT__ 2875 #define __FUNCT__ "MatFileSplit" 2876 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2877 { 2878 PetscErrorCode ierr; 2879 PetscMPIInt rank; 2880 PetscInt m,N,i,rstart,nnz; 2881 size_t len; 2882 const PetscInt *indx; 2883 PetscViewer out; 2884 char *name; 2885 Mat B; 2886 const PetscScalar *values; 2887 2888 PetscFunctionBegin; 2889 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2890 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2891 /* Should this be the type of the diagonal block of A? */ 2892 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2893 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 2894 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2895 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2896 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2897 for (i=0;i<m;i++) { 2898 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2899 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2900 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2901 } 2902 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2903 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2904 2905 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2906 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2907 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2908 sprintf(name,"%s.%d",outfile,rank); 2909 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 2910 ierr = PetscFree(name); 2911 ierr = MatView(B,out);CHKERRQ(ierr); 2912 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2913 ierr = MatDestroy(B);CHKERRQ(ierr); 2914 PetscFunctionReturn(0); 2915 } 2916 2917 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2918 #undef __FUNCT__ 2919 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2920 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2921 { 2922 PetscErrorCode ierr; 2923 Mat_Merge_SeqsToMPI *merge; 2924 PetscObjectContainer container; 2925 2926 PetscFunctionBegin; 2927 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2928 if (container) { 2929 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2930 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2931 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2932 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2933 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2934 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2935 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2936 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2937 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 2938 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 2939 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 2940 2941 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2942 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2943 } 2944 ierr = PetscFree(merge);CHKERRQ(ierr); 2945 2946 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2947 PetscFunctionReturn(0); 2948 } 2949 2950 #include "src/mat/utils/freespace.h" 2951 #include "petscbt.h" 2952 static PetscEvent logkey_seqstompinum = 0; 2953 #undef __FUNCT__ 2954 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 2955 /*@C 2956 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2957 matrices from each processor 2958 2959 Collective on MPI_Comm 2960 2961 Input Parameters: 2962 + comm - the communicators the parallel matrix will live on 2963 . seqmat - the input sequential matrices 2964 . m - number of local rows (or PETSC_DECIDE) 2965 . n - number of local columns (or PETSC_DECIDE) 2966 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2967 2968 Output Parameter: 2969 . mpimat - the parallel matrix generated 2970 2971 Level: advanced 2972 2973 Notes: 2974 The dimensions of the sequential matrix in each processor MUST be the same. 2975 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2976 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2977 @*/ 2978 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2979 { 2980 PetscErrorCode ierr; 2981 MPI_Comm comm=mpimat->comm; 2982 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2983 PetscMPIInt size,rank,taga,*len_s; 2984 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 2985 PetscInt proc,m; 2986 PetscInt **buf_ri,**buf_rj; 2987 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2988 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2989 MPI_Request *s_waits,*r_waits; 2990 MPI_Status *status; 2991 MatScalar *aa=a->a,**abuf_r,*ba_i; 2992 Mat_Merge_SeqsToMPI *merge; 2993 PetscObjectContainer container; 2994 2995 PetscFunctionBegin; 2996 if (!logkey_seqstompinum) { 2997 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2998 } 2999 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3000 3001 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3002 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3003 3004 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3005 if (container) { 3006 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3007 } 3008 bi = merge->bi; 3009 bj = merge->bj; 3010 buf_ri = merge->buf_ri; 3011 buf_rj = merge->buf_rj; 3012 3013 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3014 owners = merge->rowmap.range; 3015 len_s = merge->len_s; 3016 3017 /* send and recv matrix values */ 3018 /*-----------------------------*/ 3019 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3020 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3021 3022 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3023 for (proc=0,k=0; proc<size; proc++){ 3024 if (!len_s[proc]) continue; 3025 i = owners[proc]; 3026 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3027 k++; 3028 } 3029 3030 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3031 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3032 ierr = PetscFree(status);CHKERRQ(ierr); 3033 3034 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3035 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3036 3037 /* insert mat values of mpimat */ 3038 /*----------------------------*/ 3039 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3040 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3041 nextrow = buf_ri_k + merge->nrecv; 3042 nextai = nextrow + merge->nrecv; 3043 3044 for (k=0; k<merge->nrecv; k++){ 3045 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3046 nrows = *(buf_ri_k[k]); 3047 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3048 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3049 } 3050 3051 /* set values of ba */ 3052 m = merge->rowmap.n; 3053 for (i=0; i<m; i++) { 3054 arow = owners[rank] + i; 3055 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3056 bnzi = bi[i+1] - bi[i]; 3057 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3058 3059 /* add local non-zero vals of this proc's seqmat into ba */ 3060 anzi = ai[arow+1] - ai[arow]; 3061 aj = a->j + ai[arow]; 3062 aa = a->a + ai[arow]; 3063 nextaj = 0; 3064 for (j=0; nextaj<anzi; j++){ 3065 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3066 ba_i[j] += aa[nextaj++]; 3067 } 3068 } 3069 3070 /* add received vals into ba */ 3071 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3072 /* i-th row */ 3073 if (i == *nextrow[k]) { 3074 anzi = *(nextai[k]+1) - *nextai[k]; 3075 aj = buf_rj[k] + *(nextai[k]); 3076 aa = abuf_r[k] + *(nextai[k]); 3077 nextaj = 0; 3078 for (j=0; nextaj<anzi; j++){ 3079 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3080 ba_i[j] += aa[nextaj++]; 3081 } 3082 } 3083 nextrow[k]++; nextai[k]++; 3084 } 3085 } 3086 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3087 } 3088 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3089 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3090 3091 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3092 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3093 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3094 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3095 PetscFunctionReturn(0); 3096 } 3097 3098 static PetscEvent logkey_seqstompisym = 0; 3099 #undef __FUNCT__ 3100 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3101 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3102 { 3103 PetscErrorCode ierr; 3104 Mat B_mpi; 3105 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3106 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3107 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3108 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3109 PetscInt len,proc,*dnz,*onz; 3110 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3111 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3112 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3113 MPI_Status *status; 3114 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3115 PetscBT lnkbt; 3116 Mat_Merge_SeqsToMPI *merge; 3117 PetscObjectContainer container; 3118 3119 PetscFunctionBegin; 3120 if (!logkey_seqstompisym) { 3121 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3122 } 3123 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3124 3125 /* make sure it is a PETSc comm */ 3126 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3127 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3128 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3129 3130 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3131 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3132 3133 /* determine row ownership */ 3134 /*---------------------------------------------------------*/ 3135 merge->rowmap.n = m; 3136 merge->rowmap.N = M; 3137 merge->rowmap.bs = 1; 3138 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3139 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3140 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3141 3142 m = merge->rowmap.n; 3143 M = merge->rowmap.N; 3144 owners = merge->rowmap.range; 3145 3146 /* determine the number of messages to send, their lengths */ 3147 /*---------------------------------------------------------*/ 3148 len_s = merge->len_s; 3149 3150 len = 0; /* length of buf_si[] */ 3151 merge->nsend = 0; 3152 for (proc=0; proc<size; proc++){ 3153 len_si[proc] = 0; 3154 if (proc == rank){ 3155 len_s[proc] = 0; 3156 } else { 3157 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3158 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3159 } 3160 if (len_s[proc]) { 3161 merge->nsend++; 3162 nrows = 0; 3163 for (i=owners[proc]; i<owners[proc+1]; i++){ 3164 if (ai[i+1] > ai[i]) nrows++; 3165 } 3166 len_si[proc] = 2*(nrows+1); 3167 len += len_si[proc]; 3168 } 3169 } 3170 3171 /* determine the number and length of messages to receive for ij-structure */ 3172 /*-------------------------------------------------------------------------*/ 3173 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3174 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3175 3176 /* post the Irecv of j-structure */ 3177 /*-------------------------------*/ 3178 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&tagj);CHKERRQ(ierr); 3179 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3180 3181 /* post the Isend of j-structure */ 3182 /*--------------------------------*/ 3183 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3184 sj_waits = si_waits + merge->nsend; 3185 3186 for (proc=0, k=0; proc<size; proc++){ 3187 if (!len_s[proc]) continue; 3188 i = owners[proc]; 3189 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3190 k++; 3191 } 3192 3193 /* receives and sends of j-structure are complete */ 3194 /*------------------------------------------------*/ 3195 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3196 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3197 3198 /* send and recv i-structure */ 3199 /*---------------------------*/ 3200 ierr = PetscObjectGetNewTag((PetscObject)merge,&tagi);CHKERRQ(ierr); 3201 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3202 3203 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3204 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3205 for (proc=0,k=0; proc<size; proc++){ 3206 if (!len_s[proc]) continue; 3207 /* form outgoing message for i-structure: 3208 buf_si[0]: nrows to be sent 3209 [1:nrows]: row index (global) 3210 [nrows+1:2*nrows+1]: i-structure index 3211 */ 3212 /*-------------------------------------------*/ 3213 nrows = len_si[proc]/2 - 1; 3214 buf_si_i = buf_si + nrows+1; 3215 buf_si[0] = nrows; 3216 buf_si_i[0] = 0; 3217 nrows = 0; 3218 for (i=owners[proc]; i<owners[proc+1]; i++){ 3219 anzi = ai[i+1] - ai[i]; 3220 if (anzi) { 3221 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3222 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3223 nrows++; 3224 } 3225 } 3226 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3227 k++; 3228 buf_si += len_si[proc]; 3229 } 3230 3231 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3232 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3233 3234 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3235 for (i=0; i<merge->nrecv; i++){ 3236 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3237 } 3238 3239 ierr = PetscFree(len_si);CHKERRQ(ierr); 3240 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3241 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3242 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3243 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3244 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3245 ierr = PetscFree(status);CHKERRQ(ierr); 3246 3247 /* compute a local seq matrix in each processor */ 3248 /*----------------------------------------------*/ 3249 /* allocate bi array and free space for accumulating nonzero column info */ 3250 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3251 bi[0] = 0; 3252 3253 /* create and initialize a linked list */ 3254 nlnk = N+1; 3255 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3256 3257 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3258 len = 0; 3259 len = ai[owners[rank+1]] - ai[owners[rank]]; 3260 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3261 current_space = free_space; 3262 3263 /* determine symbolic info for each local row */ 3264 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3265 nextrow = buf_ri_k + merge->nrecv; 3266 nextai = nextrow + merge->nrecv; 3267 for (k=0; k<merge->nrecv; k++){ 3268 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3269 nrows = *buf_ri_k[k]; 3270 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3271 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3272 } 3273 3274 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3275 len = 0; 3276 for (i=0;i<m;i++) { 3277 bnzi = 0; 3278 /* add local non-zero cols of this proc's seqmat into lnk */ 3279 arow = owners[rank] + i; 3280 anzi = ai[arow+1] - ai[arow]; 3281 aj = a->j + ai[arow]; 3282 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3283 bnzi += nlnk; 3284 /* add received col data into lnk */ 3285 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3286 if (i == *nextrow[k]) { /* i-th row */ 3287 anzi = *(nextai[k]+1) - *nextai[k]; 3288 aj = buf_rj[k] + *nextai[k]; 3289 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3290 bnzi += nlnk; 3291 nextrow[k]++; nextai[k]++; 3292 } 3293 } 3294 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3295 3296 /* if free space is not available, make more free space */ 3297 if (current_space->local_remaining<bnzi) { 3298 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3299 nspacedouble++; 3300 } 3301 /* copy data into free space, then initialize lnk */ 3302 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3303 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3304 3305 current_space->array += bnzi; 3306 current_space->local_used += bnzi; 3307 current_space->local_remaining -= bnzi; 3308 3309 bi[i+1] = bi[i] + bnzi; 3310 } 3311 3312 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3313 3314 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3315 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3316 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3317 3318 /* create symbolic parallel matrix B_mpi */ 3319 /*---------------------------------------*/ 3320 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3321 if (n==PETSC_DECIDE) { 3322 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3323 } else { 3324 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3325 } 3326 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3327 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3328 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3329 3330 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3331 B_mpi->assembled = PETSC_FALSE; 3332 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3333 merge->bi = bi; 3334 merge->bj = bj; 3335 merge->buf_ri = buf_ri; 3336 merge->buf_rj = buf_rj; 3337 merge->coi = PETSC_NULL; 3338 merge->coj = PETSC_NULL; 3339 merge->owners_co = PETSC_NULL; 3340 3341 /* attach the supporting struct to B_mpi for reuse */ 3342 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3343 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3344 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3345 *mpimat = B_mpi; 3346 3347 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3348 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3349 PetscFunctionReturn(0); 3350 } 3351 3352 static PetscEvent logkey_seqstompi = 0; 3353 #undef __FUNCT__ 3354 #define __FUNCT__ "MatMerge_SeqsToMPI" 3355 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3356 { 3357 PetscErrorCode ierr; 3358 3359 PetscFunctionBegin; 3360 if (!logkey_seqstompi) { 3361 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3362 } 3363 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3364 if (scall == MAT_INITIAL_MATRIX){ 3365 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3366 } 3367 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3368 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3369 PetscFunctionReturn(0); 3370 } 3371 static PetscEvent logkey_getlocalmat = 0; 3372 #undef __FUNCT__ 3373 #define __FUNCT__ "MatGetLocalMat" 3374 /*@C 3375 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3376 3377 Not Collective 3378 3379 Input Parameters: 3380 + A - the matrix 3381 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3382 3383 Output Parameter: 3384 . A_loc - the local sequential matrix generated 3385 3386 Level: developer 3387 3388 @*/ 3389 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3390 { 3391 PetscErrorCode ierr; 3392 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3393 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3394 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3395 PetscScalar *aa=a->a,*ba=b->a,*ca; 3396 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3397 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3398 3399 PetscFunctionBegin; 3400 if (!logkey_getlocalmat) { 3401 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3402 } 3403 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3404 if (scall == MAT_INITIAL_MATRIX){ 3405 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3406 ci[0] = 0; 3407 for (i=0; i<am; i++){ 3408 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3409 } 3410 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3411 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3412 k = 0; 3413 for (i=0; i<am; i++) { 3414 ncols_o = bi[i+1] - bi[i]; 3415 ncols_d = ai[i+1] - ai[i]; 3416 /* off-diagonal portion of A */ 3417 for (jo=0; jo<ncols_o; jo++) { 3418 col = cmap[*bj]; 3419 if (col >= cstart) break; 3420 cj[k] = col; bj++; 3421 ca[k++] = *ba++; 3422 } 3423 /* diagonal portion of A */ 3424 for (j=0; j<ncols_d; j++) { 3425 cj[k] = cstart + *aj++; 3426 ca[k++] = *aa++; 3427 } 3428 /* off-diagonal portion of A */ 3429 for (j=jo; j<ncols_o; j++) { 3430 cj[k] = cmap[*bj++]; 3431 ca[k++] = *ba++; 3432 } 3433 } 3434 /* put together the new matrix */ 3435 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3436 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3437 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3438 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3439 mat->freedata = PETSC_TRUE; 3440 mat->nonew = 0; 3441 } else if (scall == MAT_REUSE_MATRIX){ 3442 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3443 ci = mat->i; cj = mat->j; ca = mat->a; 3444 for (i=0; i<am; i++) { 3445 /* off-diagonal portion of A */ 3446 ncols_o = bi[i+1] - bi[i]; 3447 for (jo=0; jo<ncols_o; jo++) { 3448 col = cmap[*bj]; 3449 if (col >= cstart) break; 3450 *ca++ = *ba++; bj++; 3451 } 3452 /* diagonal portion of A */ 3453 ncols_d = ai[i+1] - ai[i]; 3454 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3455 /* off-diagonal portion of A */ 3456 for (j=jo; j<ncols_o; j++) { 3457 *ca++ = *ba++; bj++; 3458 } 3459 } 3460 } else { 3461 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3462 } 3463 3464 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3465 PetscFunctionReturn(0); 3466 } 3467 3468 static PetscEvent logkey_getlocalmatcondensed = 0; 3469 #undef __FUNCT__ 3470 #define __FUNCT__ "MatGetLocalMatCondensed" 3471 /*@C 3472 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3473 3474 Not Collective 3475 3476 Input Parameters: 3477 + A - the matrix 3478 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3479 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3480 3481 Output Parameter: 3482 . A_loc - the local sequential matrix generated 3483 3484 Level: developer 3485 3486 @*/ 3487 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3488 { 3489 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3490 PetscErrorCode ierr; 3491 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3492 IS isrowa,iscola; 3493 Mat *aloc; 3494 3495 PetscFunctionBegin; 3496 if (!logkey_getlocalmatcondensed) { 3497 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3498 } 3499 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3500 if (!row){ 3501 start = A->rmap.rstart; end = A->rmap.rend; 3502 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3503 } else { 3504 isrowa = *row; 3505 } 3506 if (!col){ 3507 start = A->cmap.rstart; 3508 cmap = a->garray; 3509 nzA = a->A->cmap.n; 3510 nzB = a->B->cmap.n; 3511 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3512 ncols = 0; 3513 for (i=0; i<nzB; i++) { 3514 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3515 else break; 3516 } 3517 imark = i; 3518 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3519 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3520 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3521 ierr = PetscFree(idx);CHKERRQ(ierr); 3522 } else { 3523 iscola = *col; 3524 } 3525 if (scall != MAT_INITIAL_MATRIX){ 3526 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3527 aloc[0] = *A_loc; 3528 } 3529 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3530 *A_loc = aloc[0]; 3531 ierr = PetscFree(aloc);CHKERRQ(ierr); 3532 if (!row){ 3533 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3534 } 3535 if (!col){ 3536 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3537 } 3538 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3539 PetscFunctionReturn(0); 3540 } 3541 3542 static PetscEvent logkey_GetBrowsOfAcols = 0; 3543 #undef __FUNCT__ 3544 #define __FUNCT__ "MatGetBrowsOfAcols" 3545 /*@C 3546 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3547 3548 Collective on Mat 3549 3550 Input Parameters: 3551 + A,B - the matrices in mpiaij format 3552 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3553 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3554 3555 Output Parameter: 3556 + rowb, colb - index sets of rows and columns of B to extract 3557 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 3558 - B_seq - the sequential matrix generated 3559 3560 Level: developer 3561 3562 @*/ 3563 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3564 { 3565 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3566 PetscErrorCode ierr; 3567 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3568 IS isrowb,iscolb; 3569 Mat *bseq; 3570 3571 PetscFunctionBegin; 3572 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3573 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3574 } 3575 if (!logkey_GetBrowsOfAcols) { 3576 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3577 } 3578 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3579 3580 if (scall == MAT_INITIAL_MATRIX){ 3581 start = A->cmap.rstart; 3582 cmap = a->garray; 3583 nzA = a->A->cmap.n; 3584 nzB = a->B->cmap.n; 3585 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3586 ncols = 0; 3587 for (i=0; i<nzB; i++) { /* row < local row index */ 3588 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3589 else break; 3590 } 3591 imark = i; 3592 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3593 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3594 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3595 ierr = PetscFree(idx);CHKERRQ(ierr); 3596 *brstart = imark; 3597 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 3598 } else { 3599 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3600 isrowb = *rowb; iscolb = *colb; 3601 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3602 bseq[0] = *B_seq; 3603 } 3604 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3605 *B_seq = bseq[0]; 3606 ierr = PetscFree(bseq);CHKERRQ(ierr); 3607 if (!rowb){ 3608 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3609 } else { 3610 *rowb = isrowb; 3611 } 3612 if (!colb){ 3613 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3614 } else { 3615 *colb = iscolb; 3616 } 3617 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3618 PetscFunctionReturn(0); 3619 } 3620 3621 static PetscEvent logkey_GetBrowsOfAocols = 0; 3622 #undef __FUNCT__ 3623 #define __FUNCT__ "MatGetBrowsOfAoCols" 3624 /*@C 3625 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3626 of the OFF-DIAGONAL portion of local A 3627 3628 Collective on Mat 3629 3630 Input Parameters: 3631 + A,B - the matrices in mpiaij format 3632 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3633 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3634 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3635 3636 Output Parameter: 3637 + B_oth - the sequential matrix generated 3638 3639 Level: developer 3640 3641 @*/ 3642 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3643 { 3644 VecScatter_MPI_General *gen_to,*gen_from; 3645 PetscErrorCode ierr; 3646 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3647 Mat_SeqAIJ *b_oth; 3648 VecScatter ctx=a->Mvctx; 3649 MPI_Comm comm=ctx->comm; 3650 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3651 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 3652 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3653 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3654 MPI_Request *rwaits,*swaits; 3655 MPI_Status *sstatus,rstatus; 3656 PetscInt *cols; 3657 PetscScalar *vals; 3658 PetscMPIInt j; 3659 3660 PetscFunctionBegin; 3661 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3662 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3663 } 3664 if (!logkey_GetBrowsOfAocols) { 3665 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3666 } 3667 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3668 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3669 3670 gen_to = (VecScatter_MPI_General*)ctx->todata; 3671 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3672 rvalues = gen_from->values; /* holds the length of sending row */ 3673 svalues = gen_to->values; /* holds the length of receiving row */ 3674 nrecvs = gen_from->n; 3675 nsends = gen_to->n; 3676 rwaits = gen_from->requests; 3677 swaits = gen_to->requests; 3678 srow = gen_to->indices; /* local row index to be sent */ 3679 rstarts = gen_from->starts; 3680 sstarts = gen_to->starts; 3681 rprocs = gen_from->procs; 3682 sprocs = gen_to->procs; 3683 sstatus = gen_to->sstatus; 3684 3685 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3686 if (scall == MAT_INITIAL_MATRIX){ 3687 /* i-array */ 3688 /*---------*/ 3689 /* post receives */ 3690 for (i=0; i<nrecvs; i++){ 3691 rowlen = (PetscInt*)rvalues + rstarts[i]; 3692 nrows = rstarts[i+1]-rstarts[i]; 3693 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3694 } 3695 3696 /* pack the outgoing message */ 3697 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3698 rstartsj = sstartsj + nsends +1; 3699 sstartsj[0] = 0; rstartsj[0] = 0; 3700 len = 0; /* total length of j or a array to be sent */ 3701 k = 0; 3702 for (i=0; i<nsends; i++){ 3703 rowlen = (PetscInt*)svalues + sstarts[i]; 3704 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3705 for (j=0; j<nrows; j++) { 3706 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 3707 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3708 len += rowlen[j]; 3709 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3710 k++; 3711 } 3712 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3713 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3714 } 3715 /* recvs and sends of i-array are completed */ 3716 i = nrecvs; 3717 while (i--) { 3718 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3719 } 3720 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3721 /* allocate buffers for sending j and a arrays */ 3722 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3723 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3724 3725 /* create i-array of B_oth */ 3726 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3727 b_othi[0] = 0; 3728 len = 0; /* total length of j or a array to be received */ 3729 k = 0; 3730 for (i=0; i<nrecvs; i++){ 3731 rowlen = (PetscInt*)rvalues + rstarts[i]; 3732 nrows = rstarts[i+1]-rstarts[i]; 3733 for (j=0; j<nrows; j++) { 3734 b_othi[k+1] = b_othi[k] + rowlen[j]; 3735 len += rowlen[j]; k++; 3736 } 3737 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3738 } 3739 3740 /* allocate space for j and a arrrays of B_oth */ 3741 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3742 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3743 3744 /* j-array */ 3745 /*---------*/ 3746 /* post receives of j-array */ 3747 for (i=0; i<nrecvs; i++){ 3748 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3749 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3750 } 3751 k = 0; 3752 for (i=0; i<nsends; i++){ 3753 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3754 bufJ = bufj+sstartsj[i]; 3755 for (j=0; j<nrows; j++) { 3756 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3757 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3758 for (l=0; l<ncols; l++){ 3759 *bufJ++ = cols[l]; 3760 } 3761 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3762 } 3763 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3764 } 3765 3766 /* recvs and sends of j-array are completed */ 3767 i = nrecvs; 3768 while (i--) { 3769 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3770 } 3771 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3772 } else if (scall == MAT_REUSE_MATRIX){ 3773 sstartsj = *startsj; 3774 rstartsj = sstartsj + nsends +1; 3775 bufa = *bufa_ptr; 3776 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3777 b_otha = b_oth->a; 3778 } else { 3779 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3780 } 3781 3782 /* a-array */ 3783 /*---------*/ 3784 /* post receives of a-array */ 3785 for (i=0; i<nrecvs; i++){ 3786 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3787 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3788 } 3789 k = 0; 3790 for (i=0; i<nsends; i++){ 3791 nrows = sstarts[i+1]-sstarts[i]; 3792 bufA = bufa+sstartsj[i]; 3793 for (j=0; j<nrows; j++) { 3794 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3795 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3796 for (l=0; l<ncols; l++){ 3797 *bufA++ = vals[l]; 3798 } 3799 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3800 3801 } 3802 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3803 } 3804 /* recvs and sends of a-array are completed */ 3805 i = nrecvs; 3806 while (i--) { 3807 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3808 } 3809 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3810 3811 if (scall == MAT_INITIAL_MATRIX){ 3812 /* put together the new matrix */ 3813 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3814 3815 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3816 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3817 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3818 b_oth->freedata = PETSC_TRUE; 3819 b_oth->nonew = 0; 3820 3821 ierr = PetscFree(bufj);CHKERRQ(ierr); 3822 if (!startsj || !bufa_ptr){ 3823 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3824 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3825 } else { 3826 *startsj = sstartsj; 3827 *bufa_ptr = bufa; 3828 } 3829 } 3830 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3831 3832 PetscFunctionReturn(0); 3833 } 3834 3835 #undef __FUNCT__ 3836 #define __FUNCT__ "MatGetCommunicationStructs" 3837 /*@C 3838 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 3839 3840 Not Collective 3841 3842 Input Parameters: 3843 . A - The matrix in mpiaij format 3844 3845 Output Parameter: 3846 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 3847 . colmap - A map from global column index to local index into lvec 3848 - multScatter - A scatter from the argument of a matrix-vector product to lvec 3849 3850 Level: developer 3851 3852 @*/ 3853 #if defined (PETSC_USE_CTABLE) 3854 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 3855 #else 3856 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 3857 #endif 3858 { 3859 Mat_MPIAIJ *a; 3860 3861 PetscFunctionBegin; 3862 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 3863 PetscValidPointer(lvec, 2) 3864 PetscValidPointer(colmap, 3) 3865 PetscValidPointer(multScatter, 4) 3866 a = (Mat_MPIAIJ *) A->data; 3867 if (lvec) *lvec = a->lvec; 3868 if (colmap) *colmap = a->colmap; 3869 if (multScatter) *multScatter = a->Mvctx; 3870 PetscFunctionReturn(0); 3871 } 3872 3873 /*MC 3874 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3875 3876 Options Database Keys: 3877 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3878 3879 Level: beginner 3880 3881 .seealso: MatCreateMPIAIJ 3882 M*/ 3883 3884 EXTERN_C_BEGIN 3885 #undef __FUNCT__ 3886 #define __FUNCT__ "MatCreate_MPIAIJ" 3887 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3888 { 3889 Mat_MPIAIJ *b; 3890 PetscErrorCode ierr; 3891 PetscMPIInt size; 3892 3893 PetscFunctionBegin; 3894 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3895 3896 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3897 B->data = (void*)b; 3898 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3899 B->factor = 0; 3900 B->rmap.bs = 1; 3901 B->assembled = PETSC_FALSE; 3902 B->mapping = 0; 3903 3904 B->insertmode = NOT_SET_VALUES; 3905 b->size = size; 3906 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3907 3908 /* build cache for off array entries formed */ 3909 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3910 b->donotstash = PETSC_FALSE; 3911 b->colmap = 0; 3912 b->garray = 0; 3913 b->roworiented = PETSC_TRUE; 3914 3915 /* stuff used for matrix vector multiply */ 3916 b->lvec = PETSC_NULL; 3917 b->Mvctx = PETSC_NULL; 3918 3919 /* stuff for MatGetRow() */ 3920 b->rowindices = 0; 3921 b->rowvalues = 0; 3922 b->getrowactive = PETSC_FALSE; 3923 3924 3925 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3926 "MatStoreValues_MPIAIJ", 3927 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3928 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3929 "MatRetrieveValues_MPIAIJ", 3930 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3931 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3932 "MatGetDiagonalBlock_MPIAIJ", 3933 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3934 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3935 "MatIsTranspose_MPIAIJ", 3936 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3937 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3938 "MatMPIAIJSetPreallocation_MPIAIJ", 3939 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3940 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3941 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3942 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3943 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3944 "MatDiagonalScaleLocal_MPIAIJ", 3945 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3946 PetscFunctionReturn(0); 3947 } 3948 EXTERN_C_END 3949 3950 /* 3951 Special version for direct calls from Fortran 3952 */ 3953 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3954 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 3955 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3956 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 3957 #endif 3958 3959 /* Change these macros so can be used in void function */ 3960 #undef CHKERRQ 3961 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 3962 #undef SETERRQ2 3963 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 3964 #undef SETERRQ 3965 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 3966 3967 EXTERN_C_BEGIN 3968 #undef __FUNCT__ 3969 #define __FUNCT__ "matsetvaluesmpiaij_" 3970 void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv) 3971 { 3972 Mat mat = *mmat; 3973 PetscInt m = *mm, n = *mn; 3974 InsertMode addv = *maddv; 3975 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3976 PetscScalar value; 3977 PetscErrorCode ierr; 3978 3979 MatPreallocated(mat); 3980 if (mat->insertmode == NOT_SET_VALUES) { 3981 mat->insertmode = addv; 3982 } 3983 #if defined(PETSC_USE_DEBUG) 3984 else if (mat->insertmode != addv) { 3985 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3986 } 3987 #endif 3988 { 3989 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 3990 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 3991 PetscTruth roworiented = aij->roworiented; 3992 3993 /* Some Variables required in the macro */ 3994 Mat A = aij->A; 3995 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3996 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 3997 PetscScalar *aa = a->a; 3998 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 3999 Mat B = aij->B; 4000 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4001 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4002 PetscScalar *ba = b->a; 4003 4004 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4005 PetscInt nonew = a->nonew; 4006 PetscScalar *ap1,*ap2; 4007 4008 PetscFunctionBegin; 4009 for (i=0; i<m; i++) { 4010 if (im[i] < 0) continue; 4011 #if defined(PETSC_USE_DEBUG) 4012 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4013 #endif 4014 if (im[i] >= rstart && im[i] < rend) { 4015 row = im[i] - rstart; 4016 lastcol1 = -1; 4017 rp1 = aj + ai[row]; 4018 ap1 = aa + ai[row]; 4019 rmax1 = aimax[row]; 4020 nrow1 = ailen[row]; 4021 low1 = 0; 4022 high1 = nrow1; 4023 lastcol2 = -1; 4024 rp2 = bj + bi[row]; 4025 ap2 = ba + bi[row]; 4026 rmax2 = bimax[row]; 4027 nrow2 = bilen[row]; 4028 low2 = 0; 4029 high2 = nrow2; 4030 4031 for (j=0; j<n; j++) { 4032 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4033 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4034 if (in[j] >= cstart && in[j] < cend){ 4035 col = in[j] - cstart; 4036 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4037 } else if (in[j] < 0) continue; 4038 #if defined(PETSC_USE_DEBUG) 4039 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4040 #endif 4041 else { 4042 if (mat->was_assembled) { 4043 if (!aij->colmap) { 4044 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4045 } 4046 #if defined (PETSC_USE_CTABLE) 4047 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4048 col--; 4049 #else 4050 col = aij->colmap[in[j]] - 1; 4051 #endif 4052 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4053 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4054 col = in[j]; 4055 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4056 B = aij->B; 4057 b = (Mat_SeqAIJ*)B->data; 4058 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4059 rp2 = bj + bi[row]; 4060 ap2 = ba + bi[row]; 4061 rmax2 = bimax[row]; 4062 nrow2 = bilen[row]; 4063 low2 = 0; 4064 high2 = nrow2; 4065 bm = aij->B->rmap.n; 4066 ba = b->a; 4067 } 4068 } else col = in[j]; 4069 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4070 } 4071 } 4072 } else { 4073 if (!aij->donotstash) { 4074 if (roworiented) { 4075 if (ignorezeroentries && v[i*n] == 0.0) continue; 4076 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4077 } else { 4078 if (ignorezeroentries && v[i] == 0.0) continue; 4079 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4080 } 4081 } 4082 } 4083 }} 4084 PetscFunctionReturnVoid(); 4085 } 4086 EXTERN_C_END 4087