1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatGetValues_MPIAIJ" 241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 242 { 243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 244 PetscErrorCode ierr; 245 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 246 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 247 248 PetscFunctionBegin; 249 for (i=0; i<m; i++) { 250 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 251 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 252 if (idxm[i] >= rstart && idxm[i] < rend) { 253 row = idxm[i] - rstart; 254 for (j=0; j<n; j++) { 255 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 256 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 257 if (idxn[j] >= cstart && idxn[j] < cend){ 258 col = idxn[j] - cstart; 259 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 260 } else { 261 if (!aij->colmap) { 262 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 263 } 264 #if defined (PETSC_USE_CTABLE) 265 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 266 col --; 267 #else 268 col = aij->colmap[idxn[j]] - 1; 269 #endif 270 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 271 else { 272 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 273 } 274 } 275 } 276 } else { 277 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 278 } 279 } 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 if (aij->donotstash) { 294 PetscFunctionReturn(0); 295 } 296 297 /* make sure all processors are either in INSERTMODE or ADDMODE */ 298 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 299 if (addv == (ADD_VALUES|INSERT_VALUES)) { 300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 301 } 302 mat->insertmode = addv; /* in case this processor had no cache */ 303 304 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 305 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 306 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 313 { 314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 315 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 316 PetscErrorCode ierr; 317 PetscMPIInt n; 318 PetscInt i,j,rstart,ncols,flg; 319 PetscInt *row,*col,other_disassembled; 320 PetscScalar *val; 321 InsertMode addv = mat->insertmode; 322 323 PetscFunctionBegin; 324 if (!aij->donotstash) { 325 while (1) { 326 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 327 if (!flg) break; 328 329 for (i=0; i<n;) { 330 /* Now identify the consecutive vals belonging to the same row */ 331 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 332 if (j < n) ncols = j-i; 333 else ncols = n-i; 334 /* Now assemble all these values with a single function call */ 335 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 336 i = j; 337 } 338 } 339 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 340 } 341 a->compressedrow.use = PETSC_FALSE; 342 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 344 345 /* determine if any processor has disassembled, if so we must 346 also disassemble ourselfs, in order that we may reassemble. */ 347 /* 348 if nonzero structure of submatrix B cannot change then we know that 349 no processor disassembled thus we can skip this stuff 350 */ 351 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 352 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 353 if (mat->was_assembled && !other_disassembled) { 354 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 355 } 356 } 357 /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */ 358 b = (Mat_SeqAIJ *)aij->B->data; 359 360 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 361 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 362 } 363 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 364 b->compressedrow.use = PETSC_TRUE; 365 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 366 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 367 368 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 369 aij->rowvalues = 0; 370 371 /* used by MatAXPY() */ 372 a->xtoy = 0; b->xtoy = 0; 373 a->XtoY = 0; b->XtoY = 0; 374 375 PetscFunctionReturn(0); 376 } 377 378 #undef __FUNCT__ 379 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 380 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 381 { 382 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 383 PetscErrorCode ierr; 384 385 PetscFunctionBegin; 386 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 387 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 388 PetscFunctionReturn(0); 389 } 390 391 #undef __FUNCT__ 392 #define __FUNCT__ "MatZeroRows_MPIAIJ" 393 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 394 { 395 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 396 PetscErrorCode ierr; 397 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 398 PetscInt i,*owners = A->rmap.range; 399 PetscInt *nprocs,j,idx,nsends,row; 400 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 401 PetscInt *rvalues,count,base,slen,*source; 402 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 403 MPI_Comm comm = A->comm; 404 MPI_Request *send_waits,*recv_waits; 405 MPI_Status recv_status,*send_status; 406 #if defined(PETSC_DEBUG) 407 PetscTruth found = PETSC_FALSE; 408 #endif 409 410 PetscFunctionBegin; 411 /* first count number of contributors to each processor */ 412 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 413 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 414 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 415 j = 0; 416 for (i=0; i<N; i++) { 417 if (lastidx > (idx = rows[i])) j = 0; 418 lastidx = idx; 419 for (; j<size; j++) { 420 if (idx >= owners[j] && idx < owners[j+1]) { 421 nprocs[2*j]++; 422 nprocs[2*j+1] = 1; 423 owner[i] = j; 424 #if defined(PETSC_DEBUG) 425 found = PETSC_TRUE; 426 #endif 427 break; 428 } 429 } 430 #if defined(PETSC_DEBUG) 431 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 432 found = PETSC_FALSE; 433 #endif 434 } 435 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 436 437 /* inform other processors of number of messages and max length*/ 438 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 439 440 /* post receives: */ 441 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 442 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 443 for (i=0; i<nrecvs; i++) { 444 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 445 } 446 447 /* do sends: 448 1) starts[i] gives the starting index in svalues for stuff going to 449 the ith processor 450 */ 451 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 452 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 453 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 454 starts[0] = 0; 455 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 456 for (i=0; i<N; i++) { 457 svalues[starts[owner[i]]++] = rows[i]; 458 } 459 460 starts[0] = 0; 461 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 462 count = 0; 463 for (i=0; i<size; i++) { 464 if (nprocs[2*i+1]) { 465 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 466 } 467 } 468 ierr = PetscFree(starts);CHKERRQ(ierr); 469 470 base = owners[rank]; 471 472 /* wait on receives */ 473 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 474 source = lens + nrecvs; 475 count = nrecvs; slen = 0; 476 while (count) { 477 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 478 /* unpack receives into our local space */ 479 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 480 source[imdex] = recv_status.MPI_SOURCE; 481 lens[imdex] = n; 482 slen += n; 483 count--; 484 } 485 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 486 487 /* move the data into the send scatter */ 488 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 489 count = 0; 490 for (i=0; i<nrecvs; i++) { 491 values = rvalues + i*nmax; 492 for (j=0; j<lens[i]; j++) { 493 lrows[count++] = values[j] - base; 494 } 495 } 496 ierr = PetscFree(rvalues);CHKERRQ(ierr); 497 ierr = PetscFree(lens);CHKERRQ(ierr); 498 ierr = PetscFree(owner);CHKERRQ(ierr); 499 ierr = PetscFree(nprocs);CHKERRQ(ierr); 500 501 /* actually zap the local rows */ 502 /* 503 Zero the required rows. If the "diagonal block" of the matrix 504 is square and the user wishes to set the diagonal we use separate 505 code so that MatSetValues() is not called for each diagonal allocating 506 new memory, thus calling lots of mallocs and slowing things down. 507 508 Contributed by: Matthew Knepley 509 */ 510 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 511 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 512 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 513 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 514 } else if (diag != 0.0) { 515 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 516 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 517 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 518 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 519 } 520 for (i = 0; i < slen; i++) { 521 row = lrows[i] + rstart; 522 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 523 } 524 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 525 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 526 } else { 527 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 528 } 529 ierr = PetscFree(lrows);CHKERRQ(ierr); 530 531 /* wait on sends */ 532 if (nsends) { 533 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 534 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 535 ierr = PetscFree(send_status);CHKERRQ(ierr); 536 } 537 ierr = PetscFree(send_waits);CHKERRQ(ierr); 538 ierr = PetscFree(svalues);CHKERRQ(ierr); 539 540 PetscFunctionReturn(0); 541 } 542 543 #undef __FUNCT__ 544 #define __FUNCT__ "MatMult_MPIAIJ" 545 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 546 { 547 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 548 PetscErrorCode ierr; 549 PetscInt nt; 550 551 PetscFunctionBegin; 552 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 553 if (nt != A->cmap.n) { 554 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 555 } 556 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 557 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 558 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 559 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 560 PetscFunctionReturn(0); 561 } 562 563 #undef __FUNCT__ 564 #define __FUNCT__ "MatMultAdd_MPIAIJ" 565 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 566 { 567 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 568 PetscErrorCode ierr; 569 570 PetscFunctionBegin; 571 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 572 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 573 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 574 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 580 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 581 { 582 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 583 PetscErrorCode ierr; 584 PetscTruth merged; 585 586 PetscFunctionBegin; 587 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 588 /* do nondiagonal part */ 589 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 590 if (!merged) { 591 /* send it on its way */ 592 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 593 /* do local part */ 594 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 595 /* receive remote parts: note this assumes the values are not actually */ 596 /* added in yy until the next line, */ 597 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 598 } else { 599 /* do local part */ 600 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 601 /* send it on its way */ 602 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 603 /* values actually were received in the Begin() but we need to call this nop */ 604 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 605 } 606 PetscFunctionReturn(0); 607 } 608 609 EXTERN_C_BEGIN 610 #undef __FUNCT__ 611 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 612 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 613 { 614 MPI_Comm comm; 615 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 616 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 617 IS Me,Notme; 618 PetscErrorCode ierr; 619 PetscInt M,N,first,last,*notme,i; 620 PetscMPIInt size; 621 622 PetscFunctionBegin; 623 624 /* Easy test: symmetric diagonal block */ 625 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 626 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 627 if (!*f) PetscFunctionReturn(0); 628 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 629 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 630 if (size == 1) PetscFunctionReturn(0); 631 632 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 633 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 634 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 635 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 636 for (i=0; i<first; i++) notme[i] = i; 637 for (i=last; i<M; i++) notme[i-last+first] = i; 638 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 639 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 640 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 641 Aoff = Aoffs[0]; 642 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 643 Boff = Boffs[0]; 644 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 645 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 646 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 647 ierr = ISDestroy(Me);CHKERRQ(ierr); 648 ierr = ISDestroy(Notme);CHKERRQ(ierr); 649 650 PetscFunctionReturn(0); 651 } 652 EXTERN_C_END 653 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 656 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 657 { 658 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 659 PetscErrorCode ierr; 660 661 PetscFunctionBegin; 662 /* do nondiagonal part */ 663 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 664 /* send it on its way */ 665 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 666 /* do local part */ 667 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 668 /* receive remote parts */ 669 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 670 PetscFunctionReturn(0); 671 } 672 673 /* 674 This only works correctly for square matrices where the subblock A->A is the 675 diagonal block 676 */ 677 #undef __FUNCT__ 678 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 679 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 680 { 681 PetscErrorCode ierr; 682 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 683 684 PetscFunctionBegin; 685 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 686 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 687 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 688 } 689 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 690 PetscFunctionReturn(0); 691 } 692 693 #undef __FUNCT__ 694 #define __FUNCT__ "MatScale_MPIAIJ" 695 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 696 { 697 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 698 PetscErrorCode ierr; 699 700 PetscFunctionBegin; 701 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 702 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 703 PetscFunctionReturn(0); 704 } 705 706 #undef __FUNCT__ 707 #define __FUNCT__ "MatDestroy_MPIAIJ" 708 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 709 { 710 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 711 PetscErrorCode ierr; 712 713 PetscFunctionBegin; 714 #if defined(PETSC_USE_LOG) 715 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 716 #endif 717 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 718 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 719 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 720 #if defined (PETSC_USE_CTABLE) 721 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 722 #else 723 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 724 #endif 725 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 726 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 727 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 728 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 729 ierr = PetscFree(aij);CHKERRQ(ierr); 730 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 737 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatView_MPIAIJ_Binary" 743 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 744 { 745 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 746 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 747 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 748 PetscErrorCode ierr; 749 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 750 int fd; 751 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 752 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 753 PetscScalar *column_values; 754 755 PetscFunctionBegin; 756 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 757 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 758 nz = A->nz + B->nz; 759 if (!rank) { 760 header[0] = MAT_FILE_COOKIE; 761 header[1] = mat->rmap.N; 762 header[2] = mat->cmap.N; 763 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 764 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 765 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 766 /* get largest number of rows any processor has */ 767 rlen = mat->rmap.n; 768 range = mat->rmap.range; 769 for (i=1; i<size; i++) { 770 rlen = PetscMax(rlen,range[i+1] - range[i]); 771 } 772 } else { 773 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 774 rlen = mat->rmap.n; 775 } 776 777 /* load up the local row counts */ 778 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 779 for (i=0; i<mat->rmap.n; i++) { 780 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 781 } 782 783 /* store the row lengths to the file */ 784 if (!rank) { 785 MPI_Status status; 786 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 787 for (i=1; i<size; i++) { 788 rlen = range[i+1] - range[i]; 789 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 790 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 791 } 792 } else { 793 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 794 } 795 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 796 797 /* load up the local column indices */ 798 nzmax = nz; /* )th processor needs space a largest processor needs */ 799 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 800 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 801 cnt = 0; 802 for (i=0; i<mat->rmap.n; i++) { 803 for (j=B->i[i]; j<B->i[i+1]; j++) { 804 if ( (col = garray[B->j[j]]) > cstart) break; 805 column_indices[cnt++] = col; 806 } 807 for (k=A->i[i]; k<A->i[i+1]; k++) { 808 column_indices[cnt++] = A->j[k] + cstart; 809 } 810 for (; j<B->i[i+1]; j++) { 811 column_indices[cnt++] = garray[B->j[j]]; 812 } 813 } 814 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 815 816 /* store the column indices to the file */ 817 if (!rank) { 818 MPI_Status status; 819 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 820 for (i=1; i<size; i++) { 821 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 822 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 823 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 824 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 825 } 826 } else { 827 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 829 } 830 ierr = PetscFree(column_indices);CHKERRQ(ierr); 831 832 /* load up the local column values */ 833 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 834 cnt = 0; 835 for (i=0; i<mat->rmap.n; i++) { 836 for (j=B->i[i]; j<B->i[i+1]; j++) { 837 if ( garray[B->j[j]] > cstart) break; 838 column_values[cnt++] = B->a[j]; 839 } 840 for (k=A->i[i]; k<A->i[i+1]; k++) { 841 column_values[cnt++] = A->a[k]; 842 } 843 for (; j<B->i[i+1]; j++) { 844 column_values[cnt++] = B->a[j]; 845 } 846 } 847 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 848 849 /* store the column values to the file */ 850 if (!rank) { 851 MPI_Status status; 852 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 853 for (i=1; i<size; i++) { 854 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 855 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 856 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 857 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 858 } 859 } else { 860 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 861 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 862 } 863 ierr = PetscFree(column_values);CHKERRQ(ierr); 864 PetscFunctionReturn(0); 865 } 866 867 #undef __FUNCT__ 868 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 869 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 870 { 871 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 872 PetscErrorCode ierr; 873 PetscMPIInt rank = aij->rank,size = aij->size; 874 PetscTruth isdraw,iascii,isbinary; 875 PetscViewer sviewer; 876 PetscViewerFormat format; 877 878 PetscFunctionBegin; 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 881 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 882 if (iascii) { 883 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 884 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 885 MatInfo info; 886 PetscTruth inodes; 887 888 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 889 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 890 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 891 if (!inodes) { 892 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 893 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 894 } else { 895 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 896 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 897 } 898 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 899 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 900 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 901 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 902 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 903 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } else if (format == PETSC_VIEWER_ASCII_INFO) { 906 PetscInt inodecount,inodelimit,*inodes; 907 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 908 if (inodes) { 909 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 910 } else { 911 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 915 PetscFunctionReturn(0); 916 } 917 } else if (isbinary) { 918 if (size == 1) { 919 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 920 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 921 } else { 922 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 923 } 924 PetscFunctionReturn(0); 925 } else if (isdraw) { 926 PetscDraw draw; 927 PetscTruth isnull; 928 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 929 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 930 } 931 932 if (size == 1) { 933 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 934 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 935 } else { 936 /* assemble the entire matrix onto first processor. */ 937 Mat A; 938 Mat_SeqAIJ *Aloc; 939 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 940 PetscScalar *a; 941 942 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 943 if (!rank) { 944 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 945 } else { 946 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 947 } 948 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 949 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 950 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 951 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 952 953 /* copy over the A part */ 954 Aloc = (Mat_SeqAIJ*)aij->A->data; 955 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 956 row = mat->rmap.rstart; 957 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 958 for (i=0; i<m; i++) { 959 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 960 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 961 } 962 aj = Aloc->j; 963 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 964 965 /* copy over the B part */ 966 Aloc = (Mat_SeqAIJ*)aij->B->data; 967 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 968 row = mat->rmap.rstart; 969 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 970 ct = cols; 971 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 972 for (i=0; i<m; i++) { 973 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 974 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 975 } 976 ierr = PetscFree(ct);CHKERRQ(ierr); 977 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 979 /* 980 Everyone has to call to draw the matrix since the graphics waits are 981 synchronized across all processors that share the PetscDraw object 982 */ 983 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 984 if (!rank) { 985 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 986 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 987 } 988 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 989 ierr = MatDestroy(A);CHKERRQ(ierr); 990 } 991 PetscFunctionReturn(0); 992 } 993 994 #undef __FUNCT__ 995 #define __FUNCT__ "MatView_MPIAIJ" 996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 997 { 998 PetscErrorCode ierr; 999 PetscTruth iascii,isdraw,issocket,isbinary; 1000 1001 PetscFunctionBegin; 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1005 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1006 if (iascii || isdraw || isbinary || issocket) { 1007 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1008 } else { 1009 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1010 } 1011 PetscFunctionReturn(0); 1012 } 1013 1014 1015 1016 #undef __FUNCT__ 1017 #define __FUNCT__ "MatRelax_MPIAIJ" 1018 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1019 { 1020 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1021 PetscErrorCode ierr; 1022 Vec bb1; 1023 1024 PetscFunctionBegin; 1025 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1026 1027 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1028 1029 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1030 if (flag & SOR_ZERO_INITIAL_GUESS) { 1031 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1032 its--; 1033 } 1034 1035 while (its--) { 1036 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1038 1039 /* update rhs: bb1 = bb - B*x */ 1040 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1041 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1042 1043 /* local sweep */ 1044 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1045 CHKERRQ(ierr); 1046 } 1047 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1048 if (flag & SOR_ZERO_INITIAL_GUESS) { 1049 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1050 its--; 1051 } 1052 while (its--) { 1053 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1054 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1055 1056 /* update rhs: bb1 = bb - B*x */ 1057 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1058 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1059 1060 /* local sweep */ 1061 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1062 CHKERRQ(ierr); 1063 } 1064 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1065 if (flag & SOR_ZERO_INITIAL_GUESS) { 1066 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1067 its--; 1068 } 1069 while (its--) { 1070 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1071 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1072 1073 /* update rhs: bb1 = bb - B*x */ 1074 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1075 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1076 1077 /* local sweep */ 1078 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1079 CHKERRQ(ierr); 1080 } 1081 } else { 1082 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1083 } 1084 1085 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatPermute_MPIAIJ" 1091 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1092 { 1093 MPI_Comm comm,pcomm; 1094 PetscInt first,local_size,nrows,*rows; 1095 int ntids; 1096 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1097 PetscErrorCode ierr; 1098 1099 PetscFunctionBegin; 1100 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1101 /* make a collective version of 'rowp' */ 1102 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1103 if (pcomm==comm) { 1104 crowp = rowp; 1105 } else { 1106 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1107 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1108 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1109 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1110 } 1111 /* collect the global row permutation and invert it */ 1112 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1113 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1114 if (pcomm!=comm) { 1115 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1116 } 1117 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1118 /* get the local target indices */ 1119 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1120 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1121 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1122 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1123 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1124 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1125 /* the column permutation is so much easier; 1126 make a local version of 'colp' and invert it */ 1127 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1128 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1129 if (ntids==1) { 1130 lcolp = colp; 1131 } else { 1132 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1133 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1134 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1135 } 1136 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1137 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1138 if (ntids>1) { 1139 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1140 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1141 } 1142 /* now we just get the submatrix */ 1143 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1144 /* clean up */ 1145 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1146 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1152 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1153 { 1154 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1155 Mat A = mat->A,B = mat->B; 1156 PetscErrorCode ierr; 1157 PetscReal isend[5],irecv[5]; 1158 1159 PetscFunctionBegin; 1160 info->block_size = 1.0; 1161 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1162 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1163 isend[3] = info->memory; isend[4] = info->mallocs; 1164 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1165 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1166 isend[3] += info->memory; isend[4] += info->mallocs; 1167 if (flag == MAT_LOCAL) { 1168 info->nz_used = isend[0]; 1169 info->nz_allocated = isend[1]; 1170 info->nz_unneeded = isend[2]; 1171 info->memory = isend[3]; 1172 info->mallocs = isend[4]; 1173 } else if (flag == MAT_GLOBAL_MAX) { 1174 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1175 info->nz_used = irecv[0]; 1176 info->nz_allocated = irecv[1]; 1177 info->nz_unneeded = irecv[2]; 1178 info->memory = irecv[3]; 1179 info->mallocs = irecv[4]; 1180 } else if (flag == MAT_GLOBAL_SUM) { 1181 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1182 info->nz_used = irecv[0]; 1183 info->nz_allocated = irecv[1]; 1184 info->nz_unneeded = irecv[2]; 1185 info->memory = irecv[3]; 1186 info->mallocs = irecv[4]; 1187 } 1188 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1189 info->fill_ratio_needed = 0; 1190 info->factor_mallocs = 0; 1191 info->rows_global = (double)matin->rmap.N; 1192 info->columns_global = (double)matin->cmap.N; 1193 info->rows_local = (double)matin->rmap.n; 1194 info->columns_local = (double)matin->cmap.N; 1195 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatSetOption_MPIAIJ" 1201 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1202 { 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 switch (op) { 1208 case MAT_NO_NEW_NONZERO_LOCATIONS: 1209 case MAT_YES_NEW_NONZERO_LOCATIONS: 1210 case MAT_COLUMNS_UNSORTED: 1211 case MAT_COLUMNS_SORTED: 1212 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1213 case MAT_KEEP_ZEROED_ROWS: 1214 case MAT_NEW_NONZERO_LOCATION_ERR: 1215 case MAT_USE_INODES: 1216 case MAT_DO_NOT_USE_INODES: 1217 case MAT_IGNORE_ZERO_ENTRIES: 1218 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1219 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1220 break; 1221 case MAT_ROW_ORIENTED: 1222 a->roworiented = PETSC_TRUE; 1223 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1224 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1225 break; 1226 case MAT_ROWS_SORTED: 1227 case MAT_ROWS_UNSORTED: 1228 case MAT_YES_NEW_DIAGONALS: 1229 ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr); 1230 break; 1231 case MAT_COLUMN_ORIENTED: 1232 a->roworiented = PETSC_FALSE; 1233 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1234 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1235 break; 1236 case MAT_IGNORE_OFF_PROC_ENTRIES: 1237 a->donotstash = PETSC_TRUE; 1238 break; 1239 case MAT_NO_NEW_DIAGONALS: 1240 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1241 case MAT_SYMMETRIC: 1242 case MAT_STRUCTURALLY_SYMMETRIC: 1243 case MAT_HERMITIAN: 1244 case MAT_SYMMETRY_ETERNAL: 1245 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1246 break; 1247 case MAT_NOT_SYMMETRIC: 1248 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1249 case MAT_NOT_HERMITIAN: 1250 case MAT_NOT_SYMMETRY_ETERNAL: 1251 break; 1252 default: 1253 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatGetRow_MPIAIJ" 1260 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1261 { 1262 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1263 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1264 PetscErrorCode ierr; 1265 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1266 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1267 PetscInt *cmap,*idx_p; 1268 1269 PetscFunctionBegin; 1270 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1271 mat->getrowactive = PETSC_TRUE; 1272 1273 if (!mat->rowvalues && (idx || v)) { 1274 /* 1275 allocate enough space to hold information from the longest row. 1276 */ 1277 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1278 PetscInt max = 1,tmp; 1279 for (i=0; i<matin->rmap.n; i++) { 1280 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1281 if (max < tmp) { max = tmp; } 1282 } 1283 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1284 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1285 } 1286 1287 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1288 lrow = row - rstart; 1289 1290 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1291 if (!v) {pvA = 0; pvB = 0;} 1292 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1293 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1294 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1295 nztot = nzA + nzB; 1296 1297 cmap = mat->garray; 1298 if (v || idx) { 1299 if (nztot) { 1300 /* Sort by increasing column numbers, assuming A and B already sorted */ 1301 PetscInt imark = -1; 1302 if (v) { 1303 *v = v_p = mat->rowvalues; 1304 for (i=0; i<nzB; i++) { 1305 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1306 else break; 1307 } 1308 imark = i; 1309 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1310 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1311 } 1312 if (idx) { 1313 *idx = idx_p = mat->rowindices; 1314 if (imark > -1) { 1315 for (i=0; i<imark; i++) { 1316 idx_p[i] = cmap[cworkB[i]]; 1317 } 1318 } else { 1319 for (i=0; i<nzB; i++) { 1320 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1321 else break; 1322 } 1323 imark = i; 1324 } 1325 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1326 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1327 } 1328 } else { 1329 if (idx) *idx = 0; 1330 if (v) *v = 0; 1331 } 1332 } 1333 *nz = nztot; 1334 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1335 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1341 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1342 { 1343 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1344 1345 PetscFunctionBegin; 1346 if (!aij->getrowactive) { 1347 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1348 } 1349 aij->getrowactive = PETSC_FALSE; 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatNorm_MPIAIJ" 1355 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1356 { 1357 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1358 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1359 PetscErrorCode ierr; 1360 PetscInt i,j,cstart = mat->cmap.rstart; 1361 PetscReal sum = 0.0; 1362 PetscScalar *v; 1363 1364 PetscFunctionBegin; 1365 if (aij->size == 1) { 1366 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1367 } else { 1368 if (type == NORM_FROBENIUS) { 1369 v = amat->a; 1370 for (i=0; i<amat->nz; i++) { 1371 #if defined(PETSC_USE_COMPLEX) 1372 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1373 #else 1374 sum += (*v)*(*v); v++; 1375 #endif 1376 } 1377 v = bmat->a; 1378 for (i=0; i<bmat->nz; i++) { 1379 #if defined(PETSC_USE_COMPLEX) 1380 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1381 #else 1382 sum += (*v)*(*v); v++; 1383 #endif 1384 } 1385 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1386 *norm = sqrt(*norm); 1387 } else if (type == NORM_1) { /* max column norm */ 1388 PetscReal *tmp,*tmp2; 1389 PetscInt *jj,*garray = aij->garray; 1390 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1391 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1392 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1393 *norm = 0.0; 1394 v = amat->a; jj = amat->j; 1395 for (j=0; j<amat->nz; j++) { 1396 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1397 } 1398 v = bmat->a; jj = bmat->j; 1399 for (j=0; j<bmat->nz; j++) { 1400 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1401 } 1402 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1403 for (j=0; j<mat->cmap.N; j++) { 1404 if (tmp2[j] > *norm) *norm = tmp2[j]; 1405 } 1406 ierr = PetscFree(tmp);CHKERRQ(ierr); 1407 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1408 } else if (type == NORM_INFINITY) { /* max row norm */ 1409 PetscReal ntemp = 0.0; 1410 for (j=0; j<aij->A->rmap.n; j++) { 1411 v = amat->a + amat->i[j]; 1412 sum = 0.0; 1413 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1414 sum += PetscAbsScalar(*v); v++; 1415 } 1416 v = bmat->a + bmat->i[j]; 1417 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1418 sum += PetscAbsScalar(*v); v++; 1419 } 1420 if (sum > ntemp) ntemp = sum; 1421 } 1422 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1423 } else { 1424 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1425 } 1426 } 1427 PetscFunctionReturn(0); 1428 } 1429 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "MatTranspose_MPIAIJ" 1432 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1433 { 1434 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1435 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1436 PetscErrorCode ierr; 1437 PetscInt M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1438 Mat B; 1439 PetscScalar *array; 1440 1441 PetscFunctionBegin; 1442 if (!matout && M != N) { 1443 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1444 } 1445 1446 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1447 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1448 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1449 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1450 1451 /* copy over the A part */ 1452 Aloc = (Mat_SeqAIJ*)a->A->data; 1453 m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1454 row = A->rmap.rstart; 1455 for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;} 1456 for (i=0; i<m; i++) { 1457 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1458 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1459 } 1460 aj = Aloc->j; 1461 for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;} 1462 1463 /* copy over the B part */ 1464 Aloc = (Mat_SeqAIJ*)a->B->data; 1465 m = a->B->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1466 row = A->rmap.rstart; 1467 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1468 ct = cols; 1469 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1470 for (i=0; i<m; i++) { 1471 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1472 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1473 } 1474 ierr = PetscFree(ct);CHKERRQ(ierr); 1475 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1476 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1477 if (matout) { 1478 *matout = B; 1479 } else { 1480 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1481 } 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1487 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1488 { 1489 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1490 Mat a = aij->A,b = aij->B; 1491 PetscErrorCode ierr; 1492 PetscInt s1,s2,s3; 1493 1494 PetscFunctionBegin; 1495 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1496 if (rr) { 1497 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1498 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1499 /* Overlap communication with computation. */ 1500 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1501 } 1502 if (ll) { 1503 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1504 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1505 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1506 } 1507 /* scale the diagonal block */ 1508 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1509 1510 if (rr) { 1511 /* Do a scatter end and then right scale the off-diagonal block */ 1512 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1513 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1514 } 1515 1516 PetscFunctionReturn(0); 1517 } 1518 1519 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1522 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1523 { 1524 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1525 PetscErrorCode ierr; 1526 1527 PetscFunctionBegin; 1528 if (!a->rank) { 1529 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1530 } 1531 PetscFunctionReturn(0); 1532 } 1533 1534 #undef __FUNCT__ 1535 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1536 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1537 { 1538 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1539 PetscErrorCode ierr; 1540 1541 PetscFunctionBegin; 1542 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1543 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1544 PetscFunctionReturn(0); 1545 } 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1548 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1549 { 1550 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1551 PetscErrorCode ierr; 1552 1553 PetscFunctionBegin; 1554 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatEqual_MPIAIJ" 1560 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1561 { 1562 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1563 Mat a,b,c,d; 1564 PetscTruth flg; 1565 PetscErrorCode ierr; 1566 1567 PetscFunctionBegin; 1568 a = matA->A; b = matA->B; 1569 c = matB->A; d = matB->B; 1570 1571 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1572 if (flg) { 1573 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1574 } 1575 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1576 PetscFunctionReturn(0); 1577 } 1578 1579 #undef __FUNCT__ 1580 #define __FUNCT__ "MatCopy_MPIAIJ" 1581 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1582 { 1583 PetscErrorCode ierr; 1584 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1585 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1586 1587 PetscFunctionBegin; 1588 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1589 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1590 /* because of the column compression in the off-processor part of the matrix a->B, 1591 the number of columns in a->B and b->B may be different, hence we cannot call 1592 the MatCopy() directly on the two parts. If need be, we can provide a more 1593 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1594 then copying the submatrices */ 1595 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1596 } else { 1597 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1598 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1599 } 1600 PetscFunctionReturn(0); 1601 } 1602 1603 #undef __FUNCT__ 1604 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1605 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1606 { 1607 PetscErrorCode ierr; 1608 1609 PetscFunctionBegin; 1610 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1611 PetscFunctionReturn(0); 1612 } 1613 1614 #include "petscblaslapack.h" 1615 #undef __FUNCT__ 1616 #define __FUNCT__ "MatAXPY_MPIAIJ" 1617 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1618 { 1619 PetscErrorCode ierr; 1620 PetscInt i; 1621 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1622 PetscBLASInt bnz,one=1; 1623 Mat_SeqAIJ *x,*y; 1624 1625 PetscFunctionBegin; 1626 if (str == SAME_NONZERO_PATTERN) { 1627 PetscScalar alpha = a; 1628 x = (Mat_SeqAIJ *)xx->A->data; 1629 y = (Mat_SeqAIJ *)yy->A->data; 1630 bnz = (PetscBLASInt)x->nz; 1631 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1632 x = (Mat_SeqAIJ *)xx->B->data; 1633 y = (Mat_SeqAIJ *)yy->B->data; 1634 bnz = (PetscBLASInt)x->nz; 1635 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1636 } else if (str == SUBSET_NONZERO_PATTERN) { 1637 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1638 1639 x = (Mat_SeqAIJ *)xx->B->data; 1640 y = (Mat_SeqAIJ *)yy->B->data; 1641 if (y->xtoy && y->XtoY != xx->B) { 1642 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1643 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1644 } 1645 if (!y->xtoy) { /* get xtoy */ 1646 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1647 y->XtoY = xx->B; 1648 } 1649 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1650 } else { 1651 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1652 } 1653 PetscFunctionReturn(0); 1654 } 1655 1656 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1657 1658 #undef __FUNCT__ 1659 #define __FUNCT__ "MatConjugate_MPIAIJ" 1660 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1661 { 1662 #if defined(PETSC_USE_COMPLEX) 1663 PetscErrorCode ierr; 1664 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1665 1666 PetscFunctionBegin; 1667 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1668 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1669 #else 1670 PetscFunctionBegin; 1671 #endif 1672 PetscFunctionReturn(0); 1673 } 1674 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "MatRealPart_MPIAIJ" 1677 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1678 { 1679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1680 PetscErrorCode ierr; 1681 1682 PetscFunctionBegin; 1683 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1684 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1685 PetscFunctionReturn(0); 1686 } 1687 1688 #undef __FUNCT__ 1689 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1690 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1691 { 1692 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1693 PetscErrorCode ierr; 1694 1695 PetscFunctionBegin; 1696 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1697 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1698 PetscFunctionReturn(0); 1699 } 1700 1701 /* -------------------------------------------------------------------*/ 1702 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1703 MatGetRow_MPIAIJ, 1704 MatRestoreRow_MPIAIJ, 1705 MatMult_MPIAIJ, 1706 /* 4*/ MatMultAdd_MPIAIJ, 1707 MatMultTranspose_MPIAIJ, 1708 MatMultTransposeAdd_MPIAIJ, 1709 0, 1710 0, 1711 0, 1712 /*10*/ 0, 1713 0, 1714 0, 1715 MatRelax_MPIAIJ, 1716 MatTranspose_MPIAIJ, 1717 /*15*/ MatGetInfo_MPIAIJ, 1718 MatEqual_MPIAIJ, 1719 MatGetDiagonal_MPIAIJ, 1720 MatDiagonalScale_MPIAIJ, 1721 MatNorm_MPIAIJ, 1722 /*20*/ MatAssemblyBegin_MPIAIJ, 1723 MatAssemblyEnd_MPIAIJ, 1724 0, 1725 MatSetOption_MPIAIJ, 1726 MatZeroEntries_MPIAIJ, 1727 /*25*/ MatZeroRows_MPIAIJ, 1728 0, 1729 0, 1730 0, 1731 0, 1732 /*30*/ MatSetUpPreallocation_MPIAIJ, 1733 0, 1734 0, 1735 0, 1736 0, 1737 /*35*/ MatDuplicate_MPIAIJ, 1738 0, 1739 0, 1740 0, 1741 0, 1742 /*40*/ MatAXPY_MPIAIJ, 1743 MatGetSubMatrices_MPIAIJ, 1744 MatIncreaseOverlap_MPIAIJ, 1745 MatGetValues_MPIAIJ, 1746 MatCopy_MPIAIJ, 1747 /*45*/ MatPrintHelp_MPIAIJ, 1748 MatScale_MPIAIJ, 1749 0, 1750 0, 1751 0, 1752 /*50*/ MatSetBlockSize_MPIAIJ, 1753 0, 1754 0, 1755 0, 1756 0, 1757 /*55*/ MatFDColoringCreate_MPIAIJ, 1758 0, 1759 MatSetUnfactored_MPIAIJ, 1760 MatPermute_MPIAIJ, 1761 0, 1762 /*60*/ MatGetSubMatrix_MPIAIJ, 1763 MatDestroy_MPIAIJ, 1764 MatView_MPIAIJ, 1765 0, 1766 0, 1767 /*65*/ 0, 1768 0, 1769 0, 1770 0, 1771 0, 1772 /*70*/ 0, 1773 0, 1774 MatSetColoring_MPIAIJ, 1775 #if defined(PETSC_HAVE_ADIC) 1776 MatSetValuesAdic_MPIAIJ, 1777 #else 1778 0, 1779 #endif 1780 MatSetValuesAdifor_MPIAIJ, 1781 /*75*/ 0, 1782 0, 1783 0, 1784 0, 1785 0, 1786 /*80*/ 0, 1787 0, 1788 0, 1789 0, 1790 /*84*/ MatLoad_MPIAIJ, 1791 0, 1792 0, 1793 0, 1794 0, 1795 0, 1796 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1797 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1798 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1799 MatPtAP_Basic, 1800 MatPtAPSymbolic_MPIAIJ, 1801 /*95*/ MatPtAPNumeric_MPIAIJ, 1802 0, 1803 0, 1804 0, 1805 0, 1806 /*100*/0, 1807 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1808 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1809 MatConjugate_MPIAIJ, 1810 0, 1811 /*105*/MatSetValuesRow_MPIAIJ, 1812 MatRealPart_MPIAIJ, 1813 MatImaginaryPart_MPIAIJ}; 1814 1815 /* ----------------------------------------------------------------------------------------*/ 1816 1817 EXTERN_C_BEGIN 1818 #undef __FUNCT__ 1819 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1820 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1821 { 1822 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1823 PetscErrorCode ierr; 1824 1825 PetscFunctionBegin; 1826 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1827 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1828 PetscFunctionReturn(0); 1829 } 1830 EXTERN_C_END 1831 1832 EXTERN_C_BEGIN 1833 #undef __FUNCT__ 1834 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1835 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1836 { 1837 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1838 PetscErrorCode ierr; 1839 1840 PetscFunctionBegin; 1841 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1842 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1843 PetscFunctionReturn(0); 1844 } 1845 EXTERN_C_END 1846 1847 #include "petscpc.h" 1848 EXTERN_C_BEGIN 1849 #undef __FUNCT__ 1850 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1851 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1852 { 1853 Mat_MPIAIJ *b; 1854 PetscErrorCode ierr; 1855 PetscInt i; 1856 1857 PetscFunctionBegin; 1858 B->preallocated = PETSC_TRUE; 1859 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1860 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1861 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1862 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1863 1864 B->rmap.bs = B->cmap.bs = 1; 1865 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 1866 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 1867 if (d_nnz) { 1868 for (i=0; i<B->rmap.n; i++) { 1869 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1870 } 1871 } 1872 if (o_nnz) { 1873 for (i=0; i<B->rmap.n; i++) { 1874 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1875 } 1876 } 1877 b = (Mat_MPIAIJ*)B->data; 1878 1879 /* Explicitly create 2 MATSEQAIJ matrices. */ 1880 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1881 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1882 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1883 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1884 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1885 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 1886 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1887 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1888 1889 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1890 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1891 1892 PetscFunctionReturn(0); 1893 } 1894 EXTERN_C_END 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1898 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1899 { 1900 Mat mat; 1901 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1902 PetscErrorCode ierr; 1903 1904 PetscFunctionBegin; 1905 *newmat = 0; 1906 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1907 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 1908 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1909 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1910 a = (Mat_MPIAIJ*)mat->data; 1911 1912 mat->factor = matin->factor; 1913 mat->rmap.bs = matin->rmap.bs; 1914 mat->assembled = PETSC_TRUE; 1915 mat->insertmode = NOT_SET_VALUES; 1916 mat->preallocated = PETSC_TRUE; 1917 1918 a->size = oldmat->size; 1919 a->rank = oldmat->rank; 1920 a->donotstash = oldmat->donotstash; 1921 a->roworiented = oldmat->roworiented; 1922 a->rowindices = 0; 1923 a->rowvalues = 0; 1924 a->getrowactive = PETSC_FALSE; 1925 1926 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 1927 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 1928 1929 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1930 if (oldmat->colmap) { 1931 #if defined (PETSC_USE_CTABLE) 1932 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1933 #else 1934 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1935 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 1936 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 1937 #endif 1938 } else a->colmap = 0; 1939 if (oldmat->garray) { 1940 PetscInt len; 1941 len = oldmat->B->cmap.n; 1942 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1943 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1944 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1945 } else a->garray = 0; 1946 1947 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1948 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1949 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1950 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1951 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1952 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1953 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1954 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1955 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1956 *newmat = mat; 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #include "petscsys.h" 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatLoad_MPIAIJ" 1964 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 1965 { 1966 Mat A; 1967 PetscScalar *vals,*svals; 1968 MPI_Comm comm = ((PetscObject)viewer)->comm; 1969 MPI_Status status; 1970 PetscErrorCode ierr; 1971 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1972 PetscInt i,nz,j,rstart,rend,mmax; 1973 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1974 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1975 PetscInt cend,cstart,n,*rowners; 1976 int fd; 1977 1978 PetscFunctionBegin; 1979 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1980 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1981 if (!rank) { 1982 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1983 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1984 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1985 } 1986 1987 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1988 M = header[1]; N = header[2]; 1989 /* determine ownership of all rows */ 1990 m = M/size + ((M % size) > rank); 1991 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1992 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1993 1994 /* First process needs enough room for process with most rows */ 1995 if (!rank) { 1996 mmax = rowners[1]; 1997 for (i=2; i<size; i++) { 1998 mmax = PetscMax(mmax,rowners[i]); 1999 } 2000 } else mmax = m; 2001 2002 rowners[0] = 0; 2003 for (i=2; i<=size; i++) { 2004 mmax = PetscMax(mmax,rowners[i]); 2005 rowners[i] += rowners[i-1]; 2006 } 2007 rstart = rowners[rank]; 2008 rend = rowners[rank+1]; 2009 2010 /* distribute row lengths to all processors */ 2011 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2012 if (!rank) { 2013 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2014 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2015 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2016 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2017 for (j=0; j<m; j++) { 2018 procsnz[0] += ourlens[j]; 2019 } 2020 for (i=1; i<size; i++) { 2021 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2022 /* calculate the number of nonzeros on each processor */ 2023 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2024 procsnz[i] += rowlengths[j]; 2025 } 2026 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2027 } 2028 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2029 } else { 2030 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2031 } 2032 2033 if (!rank) { 2034 /* determine max buffer needed and allocate it */ 2035 maxnz = 0; 2036 for (i=0; i<size; i++) { 2037 maxnz = PetscMax(maxnz,procsnz[i]); 2038 } 2039 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2040 2041 /* read in my part of the matrix column indices */ 2042 nz = procsnz[0]; 2043 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2044 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2045 2046 /* read in every one elses and ship off */ 2047 for (i=1; i<size; i++) { 2048 nz = procsnz[i]; 2049 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2050 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2051 } 2052 ierr = PetscFree(cols);CHKERRQ(ierr); 2053 } else { 2054 /* determine buffer space needed for message */ 2055 nz = 0; 2056 for (i=0; i<m; i++) { 2057 nz += ourlens[i]; 2058 } 2059 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2060 2061 /* receive message of column indices*/ 2062 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2063 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2064 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2065 } 2066 2067 /* determine column ownership if matrix is not square */ 2068 if (N != M) { 2069 n = N/size + ((N % size) > rank); 2070 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2071 cstart = cend - n; 2072 } else { 2073 cstart = rstart; 2074 cend = rend; 2075 n = cend - cstart; 2076 } 2077 2078 /* loop over local rows, determining number of off diagonal entries */ 2079 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2080 jj = 0; 2081 for (i=0; i<m; i++) { 2082 for (j=0; j<ourlens[i]; j++) { 2083 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2084 jj++; 2085 } 2086 } 2087 2088 /* create our matrix */ 2089 for (i=0; i<m; i++) { 2090 ourlens[i] -= offlens[i]; 2091 } 2092 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2093 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2094 ierr = MatSetType(A,type);CHKERRQ(ierr); 2095 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2096 2097 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2098 for (i=0; i<m; i++) { 2099 ourlens[i] += offlens[i]; 2100 } 2101 2102 if (!rank) { 2103 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2104 2105 /* read in my part of the matrix numerical values */ 2106 nz = procsnz[0]; 2107 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2108 2109 /* insert into matrix */ 2110 jj = rstart; 2111 smycols = mycols; 2112 svals = vals; 2113 for (i=0; i<m; i++) { 2114 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2115 smycols += ourlens[i]; 2116 svals += ourlens[i]; 2117 jj++; 2118 } 2119 2120 /* read in other processors and ship out */ 2121 for (i=1; i<size; i++) { 2122 nz = procsnz[i]; 2123 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2124 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2125 } 2126 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2127 } else { 2128 /* receive numeric values */ 2129 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2130 2131 /* receive message of values*/ 2132 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2133 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2134 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2135 2136 /* insert into matrix */ 2137 jj = rstart; 2138 smycols = mycols; 2139 svals = vals; 2140 for (i=0; i<m; i++) { 2141 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2142 smycols += ourlens[i]; 2143 svals += ourlens[i]; 2144 jj++; 2145 } 2146 } 2147 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2148 ierr = PetscFree(vals);CHKERRQ(ierr); 2149 ierr = PetscFree(mycols);CHKERRQ(ierr); 2150 ierr = PetscFree(rowners);CHKERRQ(ierr); 2151 2152 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2153 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2154 *newmat = A; 2155 PetscFunctionReturn(0); 2156 } 2157 2158 #undef __FUNCT__ 2159 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2160 /* 2161 Not great since it makes two copies of the submatrix, first an SeqAIJ 2162 in local and then by concatenating the local matrices the end result. 2163 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2164 */ 2165 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2166 { 2167 PetscErrorCode ierr; 2168 PetscMPIInt rank,size; 2169 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2170 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2171 Mat *local,M,Mreuse; 2172 PetscScalar *vwork,*aa; 2173 MPI_Comm comm = mat->comm; 2174 Mat_SeqAIJ *aij; 2175 2176 2177 PetscFunctionBegin; 2178 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2179 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2180 2181 if (call == MAT_REUSE_MATRIX) { 2182 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2183 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2184 local = &Mreuse; 2185 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2186 } else { 2187 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2188 Mreuse = *local; 2189 ierr = PetscFree(local);CHKERRQ(ierr); 2190 } 2191 2192 /* 2193 m - number of local rows 2194 n - number of columns (same on all processors) 2195 rstart - first row in new global matrix generated 2196 */ 2197 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2198 if (call == MAT_INITIAL_MATRIX) { 2199 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2200 ii = aij->i; 2201 jj = aij->j; 2202 2203 /* 2204 Determine the number of non-zeros in the diagonal and off-diagonal 2205 portions of the matrix in order to do correct preallocation 2206 */ 2207 2208 /* first get start and end of "diagonal" columns */ 2209 if (csize == PETSC_DECIDE) { 2210 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2211 if (mglobal == n) { /* square matrix */ 2212 nlocal = m; 2213 } else { 2214 nlocal = n/size + ((n % size) > rank); 2215 } 2216 } else { 2217 nlocal = csize; 2218 } 2219 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2220 rstart = rend - nlocal; 2221 if (rank == size - 1 && rend != n) { 2222 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2223 } 2224 2225 /* next, compute all the lengths */ 2226 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2227 olens = dlens + m; 2228 for (i=0; i<m; i++) { 2229 jend = ii[i+1] - ii[i]; 2230 olen = 0; 2231 dlen = 0; 2232 for (j=0; j<jend; j++) { 2233 if (*jj < rstart || *jj >= rend) olen++; 2234 else dlen++; 2235 jj++; 2236 } 2237 olens[i] = olen; 2238 dlens[i] = dlen; 2239 } 2240 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2241 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2242 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2243 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2244 ierr = PetscFree(dlens);CHKERRQ(ierr); 2245 } else { 2246 PetscInt ml,nl; 2247 2248 M = *newmat; 2249 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2250 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2251 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2252 /* 2253 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2254 rather than the slower MatSetValues(). 2255 */ 2256 M->was_assembled = PETSC_TRUE; 2257 M->assembled = PETSC_FALSE; 2258 } 2259 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2260 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2261 ii = aij->i; 2262 jj = aij->j; 2263 aa = aij->a; 2264 for (i=0; i<m; i++) { 2265 row = rstart + i; 2266 nz = ii[i+1] - ii[i]; 2267 cwork = jj; jj += nz; 2268 vwork = aa; aa += nz; 2269 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2270 } 2271 2272 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2273 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2274 *newmat = M; 2275 2276 /* save submatrix used in processor for next request */ 2277 if (call == MAT_INITIAL_MATRIX) { 2278 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2279 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2280 } 2281 2282 PetscFunctionReturn(0); 2283 } 2284 2285 EXTERN_C_BEGIN 2286 #undef __FUNCT__ 2287 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2288 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2289 { 2290 PetscInt m,cstart, cend,j,nnz,i,d; 2291 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2292 const PetscInt *JJ; 2293 PetscScalar *values; 2294 PetscErrorCode ierr; 2295 2296 PetscFunctionBegin; 2297 if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]); 2298 2299 B->rmap.bs = B->cmap.bs = 1; 2300 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2301 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2302 m = B->rmap.n; 2303 cstart = B->cmap.rstart; 2304 cend = B->cmap.rend; 2305 rstart = B->rmap.rstart; 2306 2307 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2308 o_nnz = d_nnz + m; 2309 2310 for (i=0; i<m; i++) { 2311 nnz = I[i+1]- I[i]; 2312 JJ = J + I[i]; 2313 nnz_max = PetscMax(nnz_max,nnz); 2314 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2315 for (j=0; j<nnz; j++) { 2316 if (*JJ >= cstart) break; 2317 JJ++; 2318 } 2319 d = 0; 2320 for (; j<nnz; j++) { 2321 if (*JJ++ >= cend) break; 2322 d++; 2323 } 2324 d_nnz[i] = d; 2325 o_nnz[i] = nnz - d; 2326 } 2327 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2328 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2329 2330 if (v) values = (PetscScalar*)v; 2331 else { 2332 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2333 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2334 } 2335 2336 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2337 for (i=0; i<m; i++) { 2338 ii = i + rstart; 2339 nnz = I[i+1]- I[i]; 2340 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2341 } 2342 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2343 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2344 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2345 2346 if (!v) { 2347 ierr = PetscFree(values);CHKERRQ(ierr); 2348 } 2349 PetscFunctionReturn(0); 2350 } 2351 EXTERN_C_END 2352 2353 #undef __FUNCT__ 2354 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2355 /*@ 2356 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2357 (the default parallel PETSc format). 2358 2359 Collective on MPI_Comm 2360 2361 Input Parameters: 2362 + B - the matrix 2363 . i - the indices into j for the start of each local row (starts with zero) 2364 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2365 - v - optional values in the matrix 2366 2367 Level: developer 2368 2369 .keywords: matrix, aij, compressed row, sparse, parallel 2370 2371 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2372 @*/ 2373 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2374 { 2375 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2376 2377 PetscFunctionBegin; 2378 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2379 if (f) { 2380 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2381 } 2382 PetscFunctionReturn(0); 2383 } 2384 2385 #undef __FUNCT__ 2386 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2387 /*@C 2388 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2389 (the default parallel PETSc format). For good matrix assembly performance 2390 the user should preallocate the matrix storage by setting the parameters 2391 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2392 performance can be increased by more than a factor of 50. 2393 2394 Collective on MPI_Comm 2395 2396 Input Parameters: 2397 + A - the matrix 2398 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2399 (same value is used for all local rows) 2400 . d_nnz - array containing the number of nonzeros in the various rows of the 2401 DIAGONAL portion of the local submatrix (possibly different for each row) 2402 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2403 The size of this array is equal to the number of local rows, i.e 'm'. 2404 You must leave room for the diagonal entry even if it is zero. 2405 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2406 submatrix (same value is used for all local rows). 2407 - o_nnz - array containing the number of nonzeros in the various rows of the 2408 OFF-DIAGONAL portion of the local submatrix (possibly different for 2409 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2410 structure. The size of this array is equal to the number 2411 of local rows, i.e 'm'. 2412 2413 If the *_nnz parameter is given then the *_nz parameter is ignored 2414 2415 The AIJ format (also called the Yale sparse matrix format or 2416 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2417 storage. The stored row and column indices begin with zero. See the users manual for details. 2418 2419 The parallel matrix is partitioned such that the first m0 rows belong to 2420 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2421 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2422 2423 The DIAGONAL portion of the local submatrix of a processor can be defined 2424 as the submatrix which is obtained by extraction the part corresponding 2425 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2426 first row that belongs to the processor, and r2 is the last row belonging 2427 to the this processor. This is a square mxm matrix. The remaining portion 2428 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2429 2430 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2431 2432 Example usage: 2433 2434 Consider the following 8x8 matrix with 34 non-zero values, that is 2435 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2436 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2437 as follows: 2438 2439 .vb 2440 1 2 0 | 0 3 0 | 0 4 2441 Proc0 0 5 6 | 7 0 0 | 8 0 2442 9 0 10 | 11 0 0 | 12 0 2443 ------------------------------------- 2444 13 0 14 | 15 16 17 | 0 0 2445 Proc1 0 18 0 | 19 20 21 | 0 0 2446 0 0 0 | 22 23 0 | 24 0 2447 ------------------------------------- 2448 Proc2 25 26 27 | 0 0 28 | 29 0 2449 30 0 0 | 31 32 33 | 0 34 2450 .ve 2451 2452 This can be represented as a collection of submatrices as: 2453 2454 .vb 2455 A B C 2456 D E F 2457 G H I 2458 .ve 2459 2460 Where the submatrices A,B,C are owned by proc0, D,E,F are 2461 owned by proc1, G,H,I are owned by proc2. 2462 2463 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2464 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2465 The 'M','N' parameters are 8,8, and have the same values on all procs. 2466 2467 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2468 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2469 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2470 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2471 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2472 matrix, ans [DF] as another SeqAIJ matrix. 2473 2474 When d_nz, o_nz parameters are specified, d_nz storage elements are 2475 allocated for every row of the local diagonal submatrix, and o_nz 2476 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2477 One way to choose d_nz and o_nz is to use the max nonzerors per local 2478 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2479 In this case, the values of d_nz,o_nz are: 2480 .vb 2481 proc0 : dnz = 2, o_nz = 2 2482 proc1 : dnz = 3, o_nz = 2 2483 proc2 : dnz = 1, o_nz = 4 2484 .ve 2485 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2486 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2487 for proc3. i.e we are using 12+15+10=37 storage locations to store 2488 34 values. 2489 2490 When d_nnz, o_nnz parameters are specified, the storage is specified 2491 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2492 In the above case the values for d_nnz,o_nnz are: 2493 .vb 2494 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2495 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2496 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2497 .ve 2498 Here the space allocated is sum of all the above values i.e 34, and 2499 hence pre-allocation is perfect. 2500 2501 Level: intermediate 2502 2503 .keywords: matrix, aij, compressed row, sparse, parallel 2504 2505 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2506 MPIAIJ 2507 @*/ 2508 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2509 { 2510 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2511 2512 PetscFunctionBegin; 2513 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2514 if (f) { 2515 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2516 } 2517 PetscFunctionReturn(0); 2518 } 2519 2520 #undef __FUNCT__ 2521 #define __FUNCT__ "MatCreateMPIAIJ" 2522 /*@C 2523 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2524 (the default parallel PETSc format). For good matrix assembly performance 2525 the user should preallocate the matrix storage by setting the parameters 2526 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2527 performance can be increased by more than a factor of 50. 2528 2529 Collective on MPI_Comm 2530 2531 Input Parameters: 2532 + comm - MPI communicator 2533 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2534 This value should be the same as the local size used in creating the 2535 y vector for the matrix-vector product y = Ax. 2536 . n - This value should be the same as the local size used in creating the 2537 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2538 calculated if N is given) For square matrices n is almost always m. 2539 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2540 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2541 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2542 (same value is used for all local rows) 2543 . d_nnz - array containing the number of nonzeros in the various rows of the 2544 DIAGONAL portion of the local submatrix (possibly different for each row) 2545 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2546 The size of this array is equal to the number of local rows, i.e 'm'. 2547 You must leave room for the diagonal entry even if it is zero. 2548 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2549 submatrix (same value is used for all local rows). 2550 - o_nnz - array containing the number of nonzeros in the various rows of the 2551 OFF-DIAGONAL portion of the local submatrix (possibly different for 2552 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2553 structure. The size of this array is equal to the number 2554 of local rows, i.e 'm'. 2555 2556 Output Parameter: 2557 . A - the matrix 2558 2559 Notes: 2560 If the *_nnz parameter is given then the *_nz parameter is ignored 2561 2562 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2563 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2564 storage requirements for this matrix. 2565 2566 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2567 processor than it must be used on all processors that share the object for 2568 that argument. 2569 2570 The user MUST specify either the local or global matrix dimensions 2571 (possibly both). 2572 2573 The parallel matrix is partitioned such that the first m0 rows belong to 2574 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2575 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2576 2577 The DIAGONAL portion of the local submatrix of a processor can be defined 2578 as the submatrix which is obtained by extraction the part corresponding 2579 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2580 first row that belongs to the processor, and r2 is the last row belonging 2581 to the this processor. This is a square mxm matrix. The remaining portion 2582 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2583 2584 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2585 2586 When calling this routine with a single process communicator, a matrix of 2587 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2588 type of communicator, use the construction mechanism: 2589 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2590 2591 By default, this format uses inodes (identical nodes) when possible. 2592 We search for consecutive rows with the same nonzero structure, thereby 2593 reusing matrix information to achieve increased efficiency. 2594 2595 Options Database Keys: 2596 + -mat_no_inode - Do not use inodes 2597 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2598 - -mat_aij_oneindex - Internally use indexing starting at 1 2599 rather than 0. Note that when calling MatSetValues(), 2600 the user still MUST index entries starting at 0! 2601 2602 2603 Example usage: 2604 2605 Consider the following 8x8 matrix with 34 non-zero values, that is 2606 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2607 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2608 as follows: 2609 2610 .vb 2611 1 2 0 | 0 3 0 | 0 4 2612 Proc0 0 5 6 | 7 0 0 | 8 0 2613 9 0 10 | 11 0 0 | 12 0 2614 ------------------------------------- 2615 13 0 14 | 15 16 17 | 0 0 2616 Proc1 0 18 0 | 19 20 21 | 0 0 2617 0 0 0 | 22 23 0 | 24 0 2618 ------------------------------------- 2619 Proc2 25 26 27 | 0 0 28 | 29 0 2620 30 0 0 | 31 32 33 | 0 34 2621 .ve 2622 2623 This can be represented as a collection of submatrices as: 2624 2625 .vb 2626 A B C 2627 D E F 2628 G H I 2629 .ve 2630 2631 Where the submatrices A,B,C are owned by proc0, D,E,F are 2632 owned by proc1, G,H,I are owned by proc2. 2633 2634 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2635 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2636 The 'M','N' parameters are 8,8, and have the same values on all procs. 2637 2638 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2639 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2640 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2641 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2642 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2643 matrix, ans [DF] as another SeqAIJ matrix. 2644 2645 When d_nz, o_nz parameters are specified, d_nz storage elements are 2646 allocated for every row of the local diagonal submatrix, and o_nz 2647 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2648 One way to choose d_nz and o_nz is to use the max nonzerors per local 2649 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2650 In this case, the values of d_nz,o_nz are: 2651 .vb 2652 proc0 : dnz = 2, o_nz = 2 2653 proc1 : dnz = 3, o_nz = 2 2654 proc2 : dnz = 1, o_nz = 4 2655 .ve 2656 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2657 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2658 for proc3. i.e we are using 12+15+10=37 storage locations to store 2659 34 values. 2660 2661 When d_nnz, o_nnz parameters are specified, the storage is specified 2662 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2663 In the above case the values for d_nnz,o_nnz are: 2664 .vb 2665 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2666 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2667 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2668 .ve 2669 Here the space allocated is sum of all the above values i.e 34, and 2670 hence pre-allocation is perfect. 2671 2672 Level: intermediate 2673 2674 .keywords: matrix, aij, compressed row, sparse, parallel 2675 2676 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2677 MPIAIJ 2678 @*/ 2679 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2680 { 2681 PetscErrorCode ierr; 2682 PetscMPIInt size; 2683 2684 PetscFunctionBegin; 2685 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2686 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2687 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2688 if (size > 1) { 2689 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2690 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2691 } else { 2692 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2693 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2694 } 2695 PetscFunctionReturn(0); 2696 } 2697 2698 #undef __FUNCT__ 2699 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2700 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2701 { 2702 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2703 2704 PetscFunctionBegin; 2705 *Ad = a->A; 2706 *Ao = a->B; 2707 *colmap = a->garray; 2708 PetscFunctionReturn(0); 2709 } 2710 2711 #undef __FUNCT__ 2712 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2713 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2714 { 2715 PetscErrorCode ierr; 2716 PetscInt i; 2717 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2718 2719 PetscFunctionBegin; 2720 if (coloring->ctype == IS_COLORING_LOCAL) { 2721 ISColoringValue *allcolors,*colors; 2722 ISColoring ocoloring; 2723 2724 /* set coloring for diagonal portion */ 2725 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2726 2727 /* set coloring for off-diagonal portion */ 2728 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2729 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2730 for (i=0; i<a->B->cmap.n; i++) { 2731 colors[i] = allcolors[a->garray[i]]; 2732 } 2733 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2734 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2735 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2736 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2737 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2738 ISColoringValue *colors; 2739 PetscInt *larray; 2740 ISColoring ocoloring; 2741 2742 /* set coloring for diagonal portion */ 2743 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2744 for (i=0; i<a->A->cmap.n; i++) { 2745 larray[i] = i + A->cmap.rstart; 2746 } 2747 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2748 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2749 for (i=0; i<a->A->cmap.n; i++) { 2750 colors[i] = coloring->colors[larray[i]]; 2751 } 2752 ierr = PetscFree(larray);CHKERRQ(ierr); 2753 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2754 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2755 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2756 2757 /* set coloring for off-diagonal portion */ 2758 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2759 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2760 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2761 for (i=0; i<a->B->cmap.n; i++) { 2762 colors[i] = coloring->colors[larray[i]]; 2763 } 2764 ierr = PetscFree(larray);CHKERRQ(ierr); 2765 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2766 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2767 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2768 } else { 2769 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2770 } 2771 2772 PetscFunctionReturn(0); 2773 } 2774 2775 #if defined(PETSC_HAVE_ADIC) 2776 #undef __FUNCT__ 2777 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2778 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2779 { 2780 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2781 PetscErrorCode ierr; 2782 2783 PetscFunctionBegin; 2784 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2785 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2786 PetscFunctionReturn(0); 2787 } 2788 #endif 2789 2790 #undef __FUNCT__ 2791 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2792 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2793 { 2794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2795 PetscErrorCode ierr; 2796 2797 PetscFunctionBegin; 2798 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2799 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2800 PetscFunctionReturn(0); 2801 } 2802 2803 #undef __FUNCT__ 2804 #define __FUNCT__ "MatMerge" 2805 /*@C 2806 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2807 matrices from each processor 2808 2809 Collective on MPI_Comm 2810 2811 Input Parameters: 2812 + comm - the communicators the parallel matrix will live on 2813 . inmat - the input sequential matrices 2814 . n - number of local columns (or PETSC_DECIDE) 2815 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2816 2817 Output Parameter: 2818 . outmat - the parallel matrix generated 2819 2820 Level: advanced 2821 2822 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2823 2824 @*/ 2825 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2826 { 2827 PetscErrorCode ierr; 2828 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2829 PetscInt *indx; 2830 PetscScalar *values; 2831 2832 PetscFunctionBegin; 2833 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2834 if (scall == MAT_INITIAL_MATRIX){ 2835 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2836 if (n == PETSC_DECIDE){ 2837 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 2838 } 2839 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2840 rstart -= m; 2841 2842 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2843 for (i=0;i<m;i++) { 2844 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2845 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2846 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2847 } 2848 /* This routine will ONLY return MPIAIJ type matrix */ 2849 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2850 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2851 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2852 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2853 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2854 2855 } else if (scall == MAT_REUSE_MATRIX){ 2856 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2857 } else { 2858 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2859 } 2860 2861 for (i=0;i<m;i++) { 2862 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2863 I = i + rstart; 2864 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2865 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2866 } 2867 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2868 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2869 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2870 2871 PetscFunctionReturn(0); 2872 } 2873 2874 #undef __FUNCT__ 2875 #define __FUNCT__ "MatFileSplit" 2876 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2877 { 2878 PetscErrorCode ierr; 2879 PetscMPIInt rank; 2880 PetscInt m,N,i,rstart,nnz; 2881 size_t len; 2882 const PetscInt *indx; 2883 PetscViewer out; 2884 char *name; 2885 Mat B; 2886 const PetscScalar *values; 2887 2888 PetscFunctionBegin; 2889 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2890 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2891 /* Should this be the type of the diagonal block of A? */ 2892 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2893 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 2894 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2895 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2896 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2897 for (i=0;i<m;i++) { 2898 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2899 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2900 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2901 } 2902 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2903 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2904 2905 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2906 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2907 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2908 sprintf(name,"%s.%d",outfile,rank); 2909 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 2910 ierr = PetscFree(name); 2911 ierr = MatView(B,out);CHKERRQ(ierr); 2912 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2913 ierr = MatDestroy(B);CHKERRQ(ierr); 2914 PetscFunctionReturn(0); 2915 } 2916 2917 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2918 #undef __FUNCT__ 2919 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2920 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2921 { 2922 PetscErrorCode ierr; 2923 Mat_Merge_SeqsToMPI *merge; 2924 PetscObjectContainer container; 2925 2926 PetscFunctionBegin; 2927 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2928 if (container) { 2929 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2930 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2931 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2932 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2933 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2934 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2935 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2936 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2937 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 2938 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 2939 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 2940 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 2941 2942 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2943 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2944 } 2945 ierr = PetscFree(merge);CHKERRQ(ierr); 2946 2947 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2948 PetscFunctionReturn(0); 2949 } 2950 2951 #include "src/mat/utils/freespace.h" 2952 #include "petscbt.h" 2953 static PetscEvent logkey_seqstompinum = 0; 2954 #undef __FUNCT__ 2955 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 2956 /*@C 2957 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2958 matrices from each processor 2959 2960 Collective on MPI_Comm 2961 2962 Input Parameters: 2963 + comm - the communicators the parallel matrix will live on 2964 . seqmat - the input sequential matrices 2965 . m - number of local rows (or PETSC_DECIDE) 2966 . n - number of local columns (or PETSC_DECIDE) 2967 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2968 2969 Output Parameter: 2970 . mpimat - the parallel matrix generated 2971 2972 Level: advanced 2973 2974 Notes: 2975 The dimensions of the sequential matrix in each processor MUST be the same. 2976 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2977 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2978 @*/ 2979 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2980 { 2981 PetscErrorCode ierr; 2982 MPI_Comm comm=mpimat->comm; 2983 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2984 PetscMPIInt size,rank,taga,*len_s; 2985 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 2986 PetscInt proc,m; 2987 PetscInt **buf_ri,**buf_rj; 2988 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2989 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2990 MPI_Request *s_waits,*r_waits; 2991 MPI_Status *status; 2992 MatScalar *aa=a->a,**abuf_r,*ba_i; 2993 Mat_Merge_SeqsToMPI *merge; 2994 PetscObjectContainer container; 2995 2996 PetscFunctionBegin; 2997 if (!logkey_seqstompinum) { 2998 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2999 } 3000 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3001 3002 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3003 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3004 3005 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3006 if (container) { 3007 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3008 } 3009 bi = merge->bi; 3010 bj = merge->bj; 3011 buf_ri = merge->buf_ri; 3012 buf_rj = merge->buf_rj; 3013 3014 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3015 owners = merge->rowmap.range; 3016 len_s = merge->len_s; 3017 3018 /* send and recv matrix values */ 3019 /*-----------------------------*/ 3020 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3021 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3022 3023 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3024 for (proc=0,k=0; proc<size; proc++){ 3025 if (!len_s[proc]) continue; 3026 i = owners[proc]; 3027 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3028 k++; 3029 } 3030 3031 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3032 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3033 ierr = PetscFree(status);CHKERRQ(ierr); 3034 3035 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3036 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3037 3038 /* insert mat values of mpimat */ 3039 /*----------------------------*/ 3040 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3041 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3042 nextrow = buf_ri_k + merge->nrecv; 3043 nextai = nextrow + merge->nrecv; 3044 3045 for (k=0; k<merge->nrecv; k++){ 3046 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3047 nrows = *(buf_ri_k[k]); 3048 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3049 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3050 } 3051 3052 /* set values of ba */ 3053 m = merge->rowmap.n; 3054 for (i=0; i<m; i++) { 3055 arow = owners[rank] + i; 3056 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3057 bnzi = bi[i+1] - bi[i]; 3058 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3059 3060 /* add local non-zero vals of this proc's seqmat into ba */ 3061 anzi = ai[arow+1] - ai[arow]; 3062 aj = a->j + ai[arow]; 3063 aa = a->a + ai[arow]; 3064 nextaj = 0; 3065 for (j=0; nextaj<anzi; j++){ 3066 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3067 ba_i[j] += aa[nextaj++]; 3068 } 3069 } 3070 3071 /* add received vals into ba */ 3072 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3073 /* i-th row */ 3074 if (i == *nextrow[k]) { 3075 anzi = *(nextai[k]+1) - *nextai[k]; 3076 aj = buf_rj[k] + *(nextai[k]); 3077 aa = abuf_r[k] + *(nextai[k]); 3078 nextaj = 0; 3079 for (j=0; nextaj<anzi; j++){ 3080 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3081 ba_i[j] += aa[nextaj++]; 3082 } 3083 } 3084 nextrow[k]++; nextai[k]++; 3085 } 3086 } 3087 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3088 } 3089 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3090 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3091 3092 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3093 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3094 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3095 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3096 PetscFunctionReturn(0); 3097 } 3098 3099 static PetscEvent logkey_seqstompisym = 0; 3100 #undef __FUNCT__ 3101 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3102 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3103 { 3104 PetscErrorCode ierr; 3105 Mat B_mpi; 3106 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3107 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3108 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3109 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3110 PetscInt len,proc,*dnz,*onz; 3111 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3112 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3113 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3114 MPI_Status *status; 3115 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3116 PetscBT lnkbt; 3117 Mat_Merge_SeqsToMPI *merge; 3118 PetscObjectContainer container; 3119 3120 PetscFunctionBegin; 3121 if (!logkey_seqstompisym) { 3122 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3123 } 3124 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3125 3126 /* make sure it is a PETSc comm */ 3127 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3128 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3129 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3130 3131 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3132 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3133 3134 /* determine row ownership */ 3135 /*---------------------------------------------------------*/ 3136 merge->rowmap.n = m; 3137 merge->rowmap.N = M; 3138 merge->rowmap.bs = 1; 3139 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3140 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3141 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3142 3143 m = merge->rowmap.n; 3144 M = merge->rowmap.N; 3145 owners = merge->rowmap.range; 3146 3147 /* determine the number of messages to send, their lengths */ 3148 /*---------------------------------------------------------*/ 3149 len_s = merge->len_s; 3150 3151 len = 0; /* length of buf_si[] */ 3152 merge->nsend = 0; 3153 for (proc=0; proc<size; proc++){ 3154 len_si[proc] = 0; 3155 if (proc == rank){ 3156 len_s[proc] = 0; 3157 } else { 3158 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3159 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3160 } 3161 if (len_s[proc]) { 3162 merge->nsend++; 3163 nrows = 0; 3164 for (i=owners[proc]; i<owners[proc+1]; i++){ 3165 if (ai[i+1] > ai[i]) nrows++; 3166 } 3167 len_si[proc] = 2*(nrows+1); 3168 len += len_si[proc]; 3169 } 3170 } 3171 3172 /* determine the number and length of messages to receive for ij-structure */ 3173 /*-------------------------------------------------------------------------*/ 3174 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3175 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3176 3177 /* post the Irecv of j-structure */ 3178 /*-------------------------------*/ 3179 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3180 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3181 3182 /* post the Isend of j-structure */ 3183 /*--------------------------------*/ 3184 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3185 sj_waits = si_waits + merge->nsend; 3186 3187 for (proc=0, k=0; proc<size; proc++){ 3188 if (!len_s[proc]) continue; 3189 i = owners[proc]; 3190 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3191 k++; 3192 } 3193 3194 /* receives and sends of j-structure are complete */ 3195 /*------------------------------------------------*/ 3196 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3197 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3198 3199 /* send and recv i-structure */ 3200 /*---------------------------*/ 3201 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3202 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3203 3204 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3205 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3206 for (proc=0,k=0; proc<size; proc++){ 3207 if (!len_s[proc]) continue; 3208 /* form outgoing message for i-structure: 3209 buf_si[0]: nrows to be sent 3210 [1:nrows]: row index (global) 3211 [nrows+1:2*nrows+1]: i-structure index 3212 */ 3213 /*-------------------------------------------*/ 3214 nrows = len_si[proc]/2 - 1; 3215 buf_si_i = buf_si + nrows+1; 3216 buf_si[0] = nrows; 3217 buf_si_i[0] = 0; 3218 nrows = 0; 3219 for (i=owners[proc]; i<owners[proc+1]; i++){ 3220 anzi = ai[i+1] - ai[i]; 3221 if (anzi) { 3222 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3223 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3224 nrows++; 3225 } 3226 } 3227 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3228 k++; 3229 buf_si += len_si[proc]; 3230 } 3231 3232 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3233 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3234 3235 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3236 for (i=0; i<merge->nrecv; i++){ 3237 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3238 } 3239 3240 ierr = PetscFree(len_si);CHKERRQ(ierr); 3241 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3242 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3243 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3244 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3245 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3246 ierr = PetscFree(status);CHKERRQ(ierr); 3247 3248 /* compute a local seq matrix in each processor */ 3249 /*----------------------------------------------*/ 3250 /* allocate bi array and free space for accumulating nonzero column info */ 3251 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3252 bi[0] = 0; 3253 3254 /* create and initialize a linked list */ 3255 nlnk = N+1; 3256 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3257 3258 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3259 len = 0; 3260 len = ai[owners[rank+1]] - ai[owners[rank]]; 3261 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3262 current_space = free_space; 3263 3264 /* determine symbolic info for each local row */ 3265 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3266 nextrow = buf_ri_k + merge->nrecv; 3267 nextai = nextrow + merge->nrecv; 3268 for (k=0; k<merge->nrecv; k++){ 3269 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3270 nrows = *buf_ri_k[k]; 3271 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3272 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3273 } 3274 3275 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3276 len = 0; 3277 for (i=0;i<m;i++) { 3278 bnzi = 0; 3279 /* add local non-zero cols of this proc's seqmat into lnk */ 3280 arow = owners[rank] + i; 3281 anzi = ai[arow+1] - ai[arow]; 3282 aj = a->j + ai[arow]; 3283 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3284 bnzi += nlnk; 3285 /* add received col data into lnk */ 3286 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3287 if (i == *nextrow[k]) { /* i-th row */ 3288 anzi = *(nextai[k]+1) - *nextai[k]; 3289 aj = buf_rj[k] + *nextai[k]; 3290 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3291 bnzi += nlnk; 3292 nextrow[k]++; nextai[k]++; 3293 } 3294 } 3295 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3296 3297 /* if free space is not available, make more free space */ 3298 if (current_space->local_remaining<bnzi) { 3299 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3300 nspacedouble++; 3301 } 3302 /* copy data into free space, then initialize lnk */ 3303 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3304 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3305 3306 current_space->array += bnzi; 3307 current_space->local_used += bnzi; 3308 current_space->local_remaining -= bnzi; 3309 3310 bi[i+1] = bi[i] + bnzi; 3311 } 3312 3313 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3314 3315 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3316 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3317 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3318 3319 /* create symbolic parallel matrix B_mpi */ 3320 /*---------------------------------------*/ 3321 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3322 if (n==PETSC_DECIDE) { 3323 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3324 } else { 3325 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3326 } 3327 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3328 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3329 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3330 3331 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3332 B_mpi->assembled = PETSC_FALSE; 3333 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3334 merge->bi = bi; 3335 merge->bj = bj; 3336 merge->buf_ri = buf_ri; 3337 merge->buf_rj = buf_rj; 3338 merge->coi = PETSC_NULL; 3339 merge->coj = PETSC_NULL; 3340 merge->owners_co = PETSC_NULL; 3341 3342 /* attach the supporting struct to B_mpi for reuse */ 3343 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3344 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3345 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3346 *mpimat = B_mpi; 3347 3348 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3349 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3350 PetscFunctionReturn(0); 3351 } 3352 3353 static PetscEvent logkey_seqstompi = 0; 3354 #undef __FUNCT__ 3355 #define __FUNCT__ "MatMerge_SeqsToMPI" 3356 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3357 { 3358 PetscErrorCode ierr; 3359 3360 PetscFunctionBegin; 3361 if (!logkey_seqstompi) { 3362 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3363 } 3364 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3365 if (scall == MAT_INITIAL_MATRIX){ 3366 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3367 } 3368 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3369 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3370 PetscFunctionReturn(0); 3371 } 3372 static PetscEvent logkey_getlocalmat = 0; 3373 #undef __FUNCT__ 3374 #define __FUNCT__ "MatGetLocalMat" 3375 /*@C 3376 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3377 3378 Not Collective 3379 3380 Input Parameters: 3381 + A - the matrix 3382 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3383 3384 Output Parameter: 3385 . A_loc - the local sequential matrix generated 3386 3387 Level: developer 3388 3389 @*/ 3390 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3391 { 3392 PetscErrorCode ierr; 3393 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3394 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3395 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3396 PetscScalar *aa=a->a,*ba=b->a,*ca; 3397 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3398 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3399 3400 PetscFunctionBegin; 3401 if (!logkey_getlocalmat) { 3402 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3403 } 3404 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3405 if (scall == MAT_INITIAL_MATRIX){ 3406 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3407 ci[0] = 0; 3408 for (i=0; i<am; i++){ 3409 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3410 } 3411 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3412 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3413 k = 0; 3414 for (i=0; i<am; i++) { 3415 ncols_o = bi[i+1] - bi[i]; 3416 ncols_d = ai[i+1] - ai[i]; 3417 /* off-diagonal portion of A */ 3418 for (jo=0; jo<ncols_o; jo++) { 3419 col = cmap[*bj]; 3420 if (col >= cstart) break; 3421 cj[k] = col; bj++; 3422 ca[k++] = *ba++; 3423 } 3424 /* diagonal portion of A */ 3425 for (j=0; j<ncols_d; j++) { 3426 cj[k] = cstart + *aj++; 3427 ca[k++] = *aa++; 3428 } 3429 /* off-diagonal portion of A */ 3430 for (j=jo; j<ncols_o; j++) { 3431 cj[k] = cmap[*bj++]; 3432 ca[k++] = *ba++; 3433 } 3434 } 3435 /* put together the new matrix */ 3436 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3437 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3438 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3439 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3440 mat->freedata = PETSC_TRUE; 3441 mat->nonew = 0; 3442 } else if (scall == MAT_REUSE_MATRIX){ 3443 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3444 ci = mat->i; cj = mat->j; ca = mat->a; 3445 for (i=0; i<am; i++) { 3446 /* off-diagonal portion of A */ 3447 ncols_o = bi[i+1] - bi[i]; 3448 for (jo=0; jo<ncols_o; jo++) { 3449 col = cmap[*bj]; 3450 if (col >= cstart) break; 3451 *ca++ = *ba++; bj++; 3452 } 3453 /* diagonal portion of A */ 3454 ncols_d = ai[i+1] - ai[i]; 3455 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3456 /* off-diagonal portion of A */ 3457 for (j=jo; j<ncols_o; j++) { 3458 *ca++ = *ba++; bj++; 3459 } 3460 } 3461 } else { 3462 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3463 } 3464 3465 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3466 PetscFunctionReturn(0); 3467 } 3468 3469 static PetscEvent logkey_getlocalmatcondensed = 0; 3470 #undef __FUNCT__ 3471 #define __FUNCT__ "MatGetLocalMatCondensed" 3472 /*@C 3473 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3474 3475 Not Collective 3476 3477 Input Parameters: 3478 + A - the matrix 3479 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3480 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3481 3482 Output Parameter: 3483 . A_loc - the local sequential matrix generated 3484 3485 Level: developer 3486 3487 @*/ 3488 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3489 { 3490 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3491 PetscErrorCode ierr; 3492 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3493 IS isrowa,iscola; 3494 Mat *aloc; 3495 3496 PetscFunctionBegin; 3497 if (!logkey_getlocalmatcondensed) { 3498 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3499 } 3500 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3501 if (!row){ 3502 start = A->rmap.rstart; end = A->rmap.rend; 3503 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3504 } else { 3505 isrowa = *row; 3506 } 3507 if (!col){ 3508 start = A->cmap.rstart; 3509 cmap = a->garray; 3510 nzA = a->A->cmap.n; 3511 nzB = a->B->cmap.n; 3512 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3513 ncols = 0; 3514 for (i=0; i<nzB; i++) { 3515 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3516 else break; 3517 } 3518 imark = i; 3519 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3520 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3521 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3522 ierr = PetscFree(idx);CHKERRQ(ierr); 3523 } else { 3524 iscola = *col; 3525 } 3526 if (scall != MAT_INITIAL_MATRIX){ 3527 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3528 aloc[0] = *A_loc; 3529 } 3530 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3531 *A_loc = aloc[0]; 3532 ierr = PetscFree(aloc);CHKERRQ(ierr); 3533 if (!row){ 3534 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3535 } 3536 if (!col){ 3537 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3538 } 3539 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3540 PetscFunctionReturn(0); 3541 } 3542 3543 static PetscEvent logkey_GetBrowsOfAcols = 0; 3544 #undef __FUNCT__ 3545 #define __FUNCT__ "MatGetBrowsOfAcols" 3546 /*@C 3547 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3548 3549 Collective on Mat 3550 3551 Input Parameters: 3552 + A,B - the matrices in mpiaij format 3553 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3554 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3555 3556 Output Parameter: 3557 + rowb, colb - index sets of rows and columns of B to extract 3558 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 3559 - B_seq - the sequential matrix generated 3560 3561 Level: developer 3562 3563 @*/ 3564 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3565 { 3566 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3567 PetscErrorCode ierr; 3568 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3569 IS isrowb,iscolb; 3570 Mat *bseq; 3571 3572 PetscFunctionBegin; 3573 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3574 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3575 } 3576 if (!logkey_GetBrowsOfAcols) { 3577 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3578 } 3579 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3580 3581 if (scall == MAT_INITIAL_MATRIX){ 3582 start = A->cmap.rstart; 3583 cmap = a->garray; 3584 nzA = a->A->cmap.n; 3585 nzB = a->B->cmap.n; 3586 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3587 ncols = 0; 3588 for (i=0; i<nzB; i++) { /* row < local row index */ 3589 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3590 else break; 3591 } 3592 imark = i; 3593 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3594 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3595 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3596 ierr = PetscFree(idx);CHKERRQ(ierr); 3597 *brstart = imark; 3598 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 3599 } else { 3600 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3601 isrowb = *rowb; iscolb = *colb; 3602 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3603 bseq[0] = *B_seq; 3604 } 3605 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3606 *B_seq = bseq[0]; 3607 ierr = PetscFree(bseq);CHKERRQ(ierr); 3608 if (!rowb){ 3609 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3610 } else { 3611 *rowb = isrowb; 3612 } 3613 if (!colb){ 3614 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3615 } else { 3616 *colb = iscolb; 3617 } 3618 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3619 PetscFunctionReturn(0); 3620 } 3621 3622 static PetscEvent logkey_GetBrowsOfAocols = 0; 3623 #undef __FUNCT__ 3624 #define __FUNCT__ "MatGetBrowsOfAoCols" 3625 /*@C 3626 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3627 of the OFF-DIAGONAL portion of local A 3628 3629 Collective on Mat 3630 3631 Input Parameters: 3632 + A,B - the matrices in mpiaij format 3633 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3634 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3635 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3636 3637 Output Parameter: 3638 + B_oth - the sequential matrix generated 3639 3640 Level: developer 3641 3642 @*/ 3643 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3644 { 3645 VecScatter_MPI_General *gen_to,*gen_from; 3646 PetscErrorCode ierr; 3647 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3648 Mat_SeqAIJ *b_oth; 3649 VecScatter ctx=a->Mvctx; 3650 MPI_Comm comm=ctx->comm; 3651 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3652 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 3653 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3654 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3655 MPI_Request *rwaits,*swaits; 3656 MPI_Status *sstatus,rstatus; 3657 PetscInt *cols; 3658 PetscScalar *vals; 3659 PetscMPIInt j; 3660 3661 PetscFunctionBegin; 3662 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3663 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3664 } 3665 if (!logkey_GetBrowsOfAocols) { 3666 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3667 } 3668 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3669 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3670 3671 gen_to = (VecScatter_MPI_General*)ctx->todata; 3672 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3673 rvalues = gen_from->values; /* holds the length of sending row */ 3674 svalues = gen_to->values; /* holds the length of receiving row */ 3675 nrecvs = gen_from->n; 3676 nsends = gen_to->n; 3677 rwaits = gen_from->requests; 3678 swaits = gen_to->requests; 3679 srow = gen_to->indices; /* local row index to be sent */ 3680 rstarts = gen_from->starts; 3681 sstarts = gen_to->starts; 3682 rprocs = gen_from->procs; 3683 sprocs = gen_to->procs; 3684 sstatus = gen_to->sstatus; 3685 3686 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3687 if (scall == MAT_INITIAL_MATRIX){ 3688 /* i-array */ 3689 /*---------*/ 3690 /* post receives */ 3691 for (i=0; i<nrecvs; i++){ 3692 rowlen = (PetscInt*)rvalues + rstarts[i]; 3693 nrows = rstarts[i+1]-rstarts[i]; 3694 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3695 } 3696 3697 /* pack the outgoing message */ 3698 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3699 rstartsj = sstartsj + nsends +1; 3700 sstartsj[0] = 0; rstartsj[0] = 0; 3701 len = 0; /* total length of j or a array to be sent */ 3702 k = 0; 3703 for (i=0; i<nsends; i++){ 3704 rowlen = (PetscInt*)svalues + sstarts[i]; 3705 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3706 for (j=0; j<nrows; j++) { 3707 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 3708 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3709 len += rowlen[j]; 3710 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3711 k++; 3712 } 3713 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3714 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3715 } 3716 /* recvs and sends of i-array are completed */ 3717 i = nrecvs; 3718 while (i--) { 3719 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3720 } 3721 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3722 /* allocate buffers for sending j and a arrays */ 3723 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3724 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3725 3726 /* create i-array of B_oth */ 3727 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3728 b_othi[0] = 0; 3729 len = 0; /* total length of j or a array to be received */ 3730 k = 0; 3731 for (i=0; i<nrecvs; i++){ 3732 rowlen = (PetscInt*)rvalues + rstarts[i]; 3733 nrows = rstarts[i+1]-rstarts[i]; 3734 for (j=0; j<nrows; j++) { 3735 b_othi[k+1] = b_othi[k] + rowlen[j]; 3736 len += rowlen[j]; k++; 3737 } 3738 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3739 } 3740 3741 /* allocate space for j and a arrrays of B_oth */ 3742 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3743 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3744 3745 /* j-array */ 3746 /*---------*/ 3747 /* post receives of j-array */ 3748 for (i=0; i<nrecvs; i++){ 3749 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3750 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3751 } 3752 k = 0; 3753 for (i=0; i<nsends; i++){ 3754 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3755 bufJ = bufj+sstartsj[i]; 3756 for (j=0; j<nrows; j++) { 3757 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3758 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3759 for (l=0; l<ncols; l++){ 3760 *bufJ++ = cols[l]; 3761 } 3762 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3763 } 3764 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3765 } 3766 3767 /* recvs and sends of j-array are completed */ 3768 i = nrecvs; 3769 while (i--) { 3770 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3771 } 3772 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3773 } else if (scall == MAT_REUSE_MATRIX){ 3774 sstartsj = *startsj; 3775 rstartsj = sstartsj + nsends +1; 3776 bufa = *bufa_ptr; 3777 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3778 b_otha = b_oth->a; 3779 } else { 3780 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3781 } 3782 3783 /* a-array */ 3784 /*---------*/ 3785 /* post receives of a-array */ 3786 for (i=0; i<nrecvs; i++){ 3787 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3788 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3789 } 3790 k = 0; 3791 for (i=0; i<nsends; i++){ 3792 nrows = sstarts[i+1]-sstarts[i]; 3793 bufA = bufa+sstartsj[i]; 3794 for (j=0; j<nrows; j++) { 3795 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3796 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3797 for (l=0; l<ncols; l++){ 3798 *bufA++ = vals[l]; 3799 } 3800 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3801 3802 } 3803 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3804 } 3805 /* recvs and sends of a-array are completed */ 3806 i = nrecvs; 3807 while (i--) { 3808 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3809 } 3810 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3811 3812 if (scall == MAT_INITIAL_MATRIX){ 3813 /* put together the new matrix */ 3814 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3815 3816 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3817 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3818 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3819 b_oth->freedata = PETSC_TRUE; 3820 b_oth->nonew = 0; 3821 3822 ierr = PetscFree(bufj);CHKERRQ(ierr); 3823 if (!startsj || !bufa_ptr){ 3824 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3825 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3826 } else { 3827 *startsj = sstartsj; 3828 *bufa_ptr = bufa; 3829 } 3830 } 3831 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3832 3833 PetscFunctionReturn(0); 3834 } 3835 3836 #undef __FUNCT__ 3837 #define __FUNCT__ "MatGetCommunicationStructs" 3838 /*@C 3839 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 3840 3841 Not Collective 3842 3843 Input Parameters: 3844 . A - The matrix in mpiaij format 3845 3846 Output Parameter: 3847 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 3848 . colmap - A map from global column index to local index into lvec 3849 - multScatter - A scatter from the argument of a matrix-vector product to lvec 3850 3851 Level: developer 3852 3853 @*/ 3854 #if defined (PETSC_USE_CTABLE) 3855 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 3856 #else 3857 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 3858 #endif 3859 { 3860 Mat_MPIAIJ *a; 3861 3862 PetscFunctionBegin; 3863 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 3864 PetscValidPointer(lvec, 2) 3865 PetscValidPointer(colmap, 3) 3866 PetscValidPointer(multScatter, 4) 3867 a = (Mat_MPIAIJ *) A->data; 3868 if (lvec) *lvec = a->lvec; 3869 if (colmap) *colmap = a->colmap; 3870 if (multScatter) *multScatter = a->Mvctx; 3871 PetscFunctionReturn(0); 3872 } 3873 3874 /*MC 3875 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3876 3877 Options Database Keys: 3878 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3879 3880 Level: beginner 3881 3882 .seealso: MatCreateMPIAIJ 3883 M*/ 3884 3885 EXTERN_C_BEGIN 3886 #undef __FUNCT__ 3887 #define __FUNCT__ "MatCreate_MPIAIJ" 3888 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3889 { 3890 Mat_MPIAIJ *b; 3891 PetscErrorCode ierr; 3892 PetscMPIInt size; 3893 3894 PetscFunctionBegin; 3895 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3896 3897 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3898 B->data = (void*)b; 3899 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3900 B->factor = 0; 3901 B->rmap.bs = 1; 3902 B->assembled = PETSC_FALSE; 3903 B->mapping = 0; 3904 3905 B->insertmode = NOT_SET_VALUES; 3906 b->size = size; 3907 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3908 3909 /* build cache for off array entries formed */ 3910 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3911 b->donotstash = PETSC_FALSE; 3912 b->colmap = 0; 3913 b->garray = 0; 3914 b->roworiented = PETSC_TRUE; 3915 3916 /* stuff used for matrix vector multiply */ 3917 b->lvec = PETSC_NULL; 3918 b->Mvctx = PETSC_NULL; 3919 3920 /* stuff for MatGetRow() */ 3921 b->rowindices = 0; 3922 b->rowvalues = 0; 3923 b->getrowactive = PETSC_FALSE; 3924 3925 3926 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3927 "MatStoreValues_MPIAIJ", 3928 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3929 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3930 "MatRetrieveValues_MPIAIJ", 3931 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3932 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3933 "MatGetDiagonalBlock_MPIAIJ", 3934 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3935 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3936 "MatIsTranspose_MPIAIJ", 3937 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3938 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3939 "MatMPIAIJSetPreallocation_MPIAIJ", 3940 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3941 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3942 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3943 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3944 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3945 "MatDiagonalScaleLocal_MPIAIJ", 3946 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3947 PetscFunctionReturn(0); 3948 } 3949 EXTERN_C_END 3950 3951 /* 3952 Special version for direct calls from Fortran 3953 */ 3954 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3955 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 3956 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3957 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 3958 #endif 3959 3960 /* Change these macros so can be used in void function */ 3961 #undef CHKERRQ 3962 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 3963 #undef SETERRQ2 3964 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 3965 #undef SETERRQ 3966 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 3967 3968 EXTERN_C_BEGIN 3969 #undef __FUNCT__ 3970 #define __FUNCT__ "matsetvaluesmpiaij_" 3971 void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv) 3972 { 3973 Mat mat = *mmat; 3974 PetscInt m = *mm, n = *mn; 3975 InsertMode addv = *maddv; 3976 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3977 PetscScalar value; 3978 PetscErrorCode ierr; 3979 3980 MatPreallocated(mat); 3981 if (mat->insertmode == NOT_SET_VALUES) { 3982 mat->insertmode = addv; 3983 } 3984 #if defined(PETSC_USE_DEBUG) 3985 else if (mat->insertmode != addv) { 3986 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3987 } 3988 #endif 3989 { 3990 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 3991 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 3992 PetscTruth roworiented = aij->roworiented; 3993 3994 /* Some Variables required in the macro */ 3995 Mat A = aij->A; 3996 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3997 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 3998 PetscScalar *aa = a->a; 3999 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4000 Mat B = aij->B; 4001 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4002 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4003 PetscScalar *ba = b->a; 4004 4005 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4006 PetscInt nonew = a->nonew; 4007 PetscScalar *ap1,*ap2; 4008 4009 PetscFunctionBegin; 4010 for (i=0; i<m; i++) { 4011 if (im[i] < 0) continue; 4012 #if defined(PETSC_USE_DEBUG) 4013 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4014 #endif 4015 if (im[i] >= rstart && im[i] < rend) { 4016 row = im[i] - rstart; 4017 lastcol1 = -1; 4018 rp1 = aj + ai[row]; 4019 ap1 = aa + ai[row]; 4020 rmax1 = aimax[row]; 4021 nrow1 = ailen[row]; 4022 low1 = 0; 4023 high1 = nrow1; 4024 lastcol2 = -1; 4025 rp2 = bj + bi[row]; 4026 ap2 = ba + bi[row]; 4027 rmax2 = bimax[row]; 4028 nrow2 = bilen[row]; 4029 low2 = 0; 4030 high2 = nrow2; 4031 4032 for (j=0; j<n; j++) { 4033 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4034 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4035 if (in[j] >= cstart && in[j] < cend){ 4036 col = in[j] - cstart; 4037 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4038 } else if (in[j] < 0) continue; 4039 #if defined(PETSC_USE_DEBUG) 4040 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4041 #endif 4042 else { 4043 if (mat->was_assembled) { 4044 if (!aij->colmap) { 4045 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4046 } 4047 #if defined (PETSC_USE_CTABLE) 4048 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4049 col--; 4050 #else 4051 col = aij->colmap[in[j]] - 1; 4052 #endif 4053 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4054 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4055 col = in[j]; 4056 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4057 B = aij->B; 4058 b = (Mat_SeqAIJ*)B->data; 4059 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4060 rp2 = bj + bi[row]; 4061 ap2 = ba + bi[row]; 4062 rmax2 = bimax[row]; 4063 nrow2 = bilen[row]; 4064 low2 = 0; 4065 high2 = nrow2; 4066 bm = aij->B->rmap.n; 4067 ba = b->a; 4068 } 4069 } else col = in[j]; 4070 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4071 } 4072 } 4073 } else { 4074 if (!aij->donotstash) { 4075 if (roworiented) { 4076 if (ignorezeroentries && v[i*n] == 0.0) continue; 4077 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4078 } else { 4079 if (ignorezeroentries && v[i] == 0.0) continue; 4080 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4081 } 4082 } 4083 } 4084 }} 4085 PetscFunctionReturnVoid(); 4086 } 4087 EXTERN_C_END 4088