1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatGetValues_MPIAIJ" 241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 242 { 243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 244 PetscErrorCode ierr; 245 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 246 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 247 248 PetscFunctionBegin; 249 for (i=0; i<m; i++) { 250 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 251 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 252 if (idxm[i] >= rstart && idxm[i] < rend) { 253 row = idxm[i] - rstart; 254 for (j=0; j<n; j++) { 255 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 256 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 257 if (idxn[j] >= cstart && idxn[j] < cend){ 258 col = idxn[j] - cstart; 259 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 260 } else { 261 if (!aij->colmap) { 262 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 263 } 264 #if defined (PETSC_USE_CTABLE) 265 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 266 col --; 267 #else 268 col = aij->colmap[idxn[j]] - 1; 269 #endif 270 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 271 else { 272 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 273 } 274 } 275 } 276 } else { 277 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 278 } 279 } 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 if (aij->donotstash) { 294 PetscFunctionReturn(0); 295 } 296 297 /* make sure all processors are either in INSERTMODE or ADDMODE */ 298 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 299 if (addv == (ADD_VALUES|INSERT_VALUES)) { 300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 301 } 302 mat->insertmode = addv; /* in case this processor had no cache */ 303 304 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 305 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 306 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 313 { 314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 315 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 316 PetscErrorCode ierr; 317 PetscMPIInt n; 318 PetscInt i,j,rstart,ncols,flg; 319 PetscInt *row,*col,other_disassembled; 320 PetscScalar *val; 321 InsertMode addv = mat->insertmode; 322 323 PetscFunctionBegin; 324 if (!aij->donotstash) { 325 while (1) { 326 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 327 if (!flg) break; 328 329 for (i=0; i<n;) { 330 /* Now identify the consecutive vals belonging to the same row */ 331 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 332 if (j < n) ncols = j-i; 333 else ncols = n-i; 334 /* Now assemble all these values with a single function call */ 335 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 336 i = j; 337 } 338 } 339 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 340 } 341 a->compressedrow.use = PETSC_FALSE; 342 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 344 345 /* determine if any processor has disassembled, if so we must 346 also disassemble ourselfs, in order that we may reassemble. */ 347 /* 348 if nonzero structure of submatrix B cannot change then we know that 349 no processor disassembled thus we can skip this stuff 350 */ 351 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 352 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 353 if (mat->was_assembled && !other_disassembled) { 354 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 355 } 356 } 357 /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */ 358 b = (Mat_SeqAIJ *)aij->B->data; 359 360 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 361 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 362 } 363 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 364 b->compressedrow.use = PETSC_TRUE; 365 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 366 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 367 368 if (aij->rowvalues) { 369 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 370 aij->rowvalues = 0; 371 } 372 373 /* used by MatAXPY() */ 374 a->xtoy = 0; b->xtoy = 0; 375 a->XtoY = 0; b->XtoY = 0; 376 377 PetscFunctionReturn(0); 378 } 379 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 382 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 383 { 384 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 385 PetscErrorCode ierr; 386 387 PetscFunctionBegin; 388 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 389 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 390 PetscFunctionReturn(0); 391 } 392 393 #undef __FUNCT__ 394 #define __FUNCT__ "MatZeroRows_MPIAIJ" 395 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 396 { 397 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 398 PetscErrorCode ierr; 399 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 400 PetscInt i,*owners = A->rmap.range; 401 PetscInt *nprocs,j,idx,nsends,row; 402 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 403 PetscInt *rvalues,count,base,slen,*source; 404 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 405 MPI_Comm comm = A->comm; 406 MPI_Request *send_waits,*recv_waits; 407 MPI_Status recv_status,*send_status; 408 #if defined(PETSC_DEBUG) 409 PetscTruth found = PETSC_FALSE; 410 #endif 411 412 PetscFunctionBegin; 413 /* first count number of contributors to each processor */ 414 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 415 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 416 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 417 j = 0; 418 for (i=0; i<N; i++) { 419 if (lastidx > (idx = rows[i])) j = 0; 420 lastidx = idx; 421 for (; j<size; j++) { 422 if (idx >= owners[j] && idx < owners[j+1]) { 423 nprocs[2*j]++; 424 nprocs[2*j+1] = 1; 425 owner[i] = j; 426 #if defined(PETSC_DEBUG) 427 found = PETSC_TRUE; 428 #endif 429 break; 430 } 431 } 432 #if defined(PETSC_DEBUG) 433 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 434 found = PETSC_FALSE; 435 #endif 436 } 437 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 438 439 /* inform other processors of number of messages and max length*/ 440 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 441 442 /* post receives: */ 443 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 444 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 445 for (i=0; i<nrecvs; i++) { 446 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 447 } 448 449 /* do sends: 450 1) starts[i] gives the starting index in svalues for stuff going to 451 the ith processor 452 */ 453 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 454 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 455 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 456 starts[0] = 0; 457 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 458 for (i=0; i<N; i++) { 459 svalues[starts[owner[i]]++] = rows[i]; 460 } 461 462 starts[0] = 0; 463 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 464 count = 0; 465 for (i=0; i<size; i++) { 466 if (nprocs[2*i+1]) { 467 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 468 } 469 } 470 ierr = PetscFree(starts);CHKERRQ(ierr); 471 472 base = owners[rank]; 473 474 /* wait on receives */ 475 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 476 source = lens + nrecvs; 477 count = nrecvs; slen = 0; 478 while (count) { 479 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 480 /* unpack receives into our local space */ 481 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 482 source[imdex] = recv_status.MPI_SOURCE; 483 lens[imdex] = n; 484 slen += n; 485 count--; 486 } 487 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 488 489 /* move the data into the send scatter */ 490 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 491 count = 0; 492 for (i=0; i<nrecvs; i++) { 493 values = rvalues + i*nmax; 494 for (j=0; j<lens[i]; j++) { 495 lrows[count++] = values[j] - base; 496 } 497 } 498 ierr = PetscFree(rvalues);CHKERRQ(ierr); 499 ierr = PetscFree(lens);CHKERRQ(ierr); 500 ierr = PetscFree(owner);CHKERRQ(ierr); 501 ierr = PetscFree(nprocs);CHKERRQ(ierr); 502 503 /* actually zap the local rows */ 504 /* 505 Zero the required rows. If the "diagonal block" of the matrix 506 is square and the user wishes to set the diagonal we use separate 507 code so that MatSetValues() is not called for each diagonal allocating 508 new memory, thus calling lots of mallocs and slowing things down. 509 510 Contributed by: Matthew Knepley 511 */ 512 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 513 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 514 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 515 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 516 } else if (diag != 0.0) { 517 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 518 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 519 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 520 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 521 } 522 for (i = 0; i < slen; i++) { 523 row = lrows[i] + rstart; 524 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 525 } 526 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 527 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 } else { 529 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 530 } 531 ierr = PetscFree(lrows);CHKERRQ(ierr); 532 533 /* wait on sends */ 534 if (nsends) { 535 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 536 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 537 ierr = PetscFree(send_status);CHKERRQ(ierr); 538 } 539 ierr = PetscFree(send_waits);CHKERRQ(ierr); 540 ierr = PetscFree(svalues);CHKERRQ(ierr); 541 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatMult_MPIAIJ" 547 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 548 { 549 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 550 PetscErrorCode ierr; 551 PetscInt nt; 552 553 PetscFunctionBegin; 554 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 555 if (nt != A->cmap.n) { 556 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 557 } 558 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 559 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 560 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 561 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 562 PetscFunctionReturn(0); 563 } 564 565 #undef __FUNCT__ 566 #define __FUNCT__ "MatMultAdd_MPIAIJ" 567 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 568 { 569 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 570 PetscErrorCode ierr; 571 572 PetscFunctionBegin; 573 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 574 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 575 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 576 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 582 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 583 { 584 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 585 PetscErrorCode ierr; 586 PetscTruth merged; 587 588 PetscFunctionBegin; 589 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 590 /* do nondiagonal part */ 591 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 592 if (!merged) { 593 /* send it on its way */ 594 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 595 /* do local part */ 596 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 597 /* receive remote parts: note this assumes the values are not actually */ 598 /* added in yy until the next line, */ 599 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 600 } else { 601 /* do local part */ 602 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 603 /* send it on its way */ 604 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 605 /* values actually were received in the Begin() but we need to call this nop */ 606 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 607 } 608 PetscFunctionReturn(0); 609 } 610 611 EXTERN_C_BEGIN 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 614 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 615 { 616 MPI_Comm comm; 617 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 618 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 619 IS Me,Notme; 620 PetscErrorCode ierr; 621 PetscInt M,N,first,last,*notme,i; 622 PetscMPIInt size; 623 624 PetscFunctionBegin; 625 626 /* Easy test: symmetric diagonal block */ 627 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 628 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 629 if (!*f) PetscFunctionReturn(0); 630 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 631 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 632 if (size == 1) PetscFunctionReturn(0); 633 634 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 635 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 636 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 637 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 638 for (i=0; i<first; i++) notme[i] = i; 639 for (i=last; i<M; i++) notme[i-last+first] = i; 640 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 641 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 642 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 643 Aoff = Aoffs[0]; 644 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 645 Boff = Boffs[0]; 646 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 647 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 648 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 649 ierr = ISDestroy(Me);CHKERRQ(ierr); 650 ierr = ISDestroy(Notme);CHKERRQ(ierr); 651 652 PetscFunctionReturn(0); 653 } 654 EXTERN_C_END 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 658 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 659 { 660 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 661 PetscErrorCode ierr; 662 663 PetscFunctionBegin; 664 /* do nondiagonal part */ 665 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 666 /* send it on its way */ 667 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 668 /* do local part */ 669 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 670 /* receive remote parts */ 671 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 672 PetscFunctionReturn(0); 673 } 674 675 /* 676 This only works correctly for square matrices where the subblock A->A is the 677 diagonal block 678 */ 679 #undef __FUNCT__ 680 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 681 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 682 { 683 PetscErrorCode ierr; 684 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 685 686 PetscFunctionBegin; 687 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 688 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 689 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 690 } 691 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatScale_MPIAIJ" 697 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 698 { 699 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 700 PetscErrorCode ierr; 701 702 PetscFunctionBegin; 703 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 704 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatDestroy_MPIAIJ" 710 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 711 { 712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 713 PetscErrorCode ierr; 714 715 PetscFunctionBegin; 716 #if defined(PETSC_USE_LOG) 717 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 718 #endif 719 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 720 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 721 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 722 #if defined (PETSC_USE_CTABLE) 723 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 724 #else 725 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 726 #endif 727 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 728 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 729 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 730 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 731 ierr = PetscFree(aij);CHKERRQ(ierr); 732 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 737 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 738 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 739 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatView_MPIAIJ_Binary" 745 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 746 { 747 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 748 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 749 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 750 PetscErrorCode ierr; 751 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 752 int fd; 753 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 754 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 755 PetscScalar *column_values; 756 757 PetscFunctionBegin; 758 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 759 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 760 nz = A->nz + B->nz; 761 if (!rank) { 762 header[0] = MAT_FILE_COOKIE; 763 header[1] = mat->rmap.N; 764 header[2] = mat->cmap.N; 765 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 766 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 767 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 768 /* get largest number of rows any processor has */ 769 rlen = mat->rmap.n; 770 range = mat->rmap.range; 771 for (i=1; i<size; i++) { 772 rlen = PetscMax(rlen,range[i+1] - range[i]); 773 } 774 } else { 775 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 776 rlen = mat->rmap.n; 777 } 778 779 /* load up the local row counts */ 780 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 781 for (i=0; i<mat->rmap.n; i++) { 782 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 783 } 784 785 /* store the row lengths to the file */ 786 if (!rank) { 787 MPI_Status status; 788 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 789 for (i=1; i<size; i++) { 790 rlen = range[i+1] - range[i]; 791 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 792 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 793 } 794 } else { 795 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 796 } 797 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 798 799 /* load up the local column indices */ 800 nzmax = nz; /* )th processor needs space a largest processor needs */ 801 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 802 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 803 cnt = 0; 804 for (i=0; i<mat->rmap.n; i++) { 805 for (j=B->i[i]; j<B->i[i+1]; j++) { 806 if ( (col = garray[B->j[j]]) > cstart) break; 807 column_indices[cnt++] = col; 808 } 809 for (k=A->i[i]; k<A->i[i+1]; k++) { 810 column_indices[cnt++] = A->j[k] + cstart; 811 } 812 for (; j<B->i[i+1]; j++) { 813 column_indices[cnt++] = garray[B->j[j]]; 814 } 815 } 816 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 817 818 /* store the column indices to the file */ 819 if (!rank) { 820 MPI_Status status; 821 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 822 for (i=1; i<size; i++) { 823 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 824 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 825 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 826 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 827 } 828 } else { 829 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 830 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 831 } 832 ierr = PetscFree(column_indices);CHKERRQ(ierr); 833 834 /* load up the local column values */ 835 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 836 cnt = 0; 837 for (i=0; i<mat->rmap.n; i++) { 838 for (j=B->i[i]; j<B->i[i+1]; j++) { 839 if ( garray[B->j[j]] > cstart) break; 840 column_values[cnt++] = B->a[j]; 841 } 842 for (k=A->i[i]; k<A->i[i+1]; k++) { 843 column_values[cnt++] = A->a[k]; 844 } 845 for (; j<B->i[i+1]; j++) { 846 column_values[cnt++] = B->a[j]; 847 } 848 } 849 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 850 851 /* store the column values to the file */ 852 if (!rank) { 853 MPI_Status status; 854 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 855 for (i=1; i<size; i++) { 856 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 857 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 858 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 859 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 860 } 861 } else { 862 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 863 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 864 } 865 ierr = PetscFree(column_values);CHKERRQ(ierr); 866 PetscFunctionReturn(0); 867 } 868 869 #undef __FUNCT__ 870 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 871 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 872 { 873 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 874 PetscErrorCode ierr; 875 PetscMPIInt rank = aij->rank,size = aij->size; 876 PetscTruth isdraw,iascii,isbinary; 877 PetscViewer sviewer; 878 PetscViewerFormat format; 879 880 PetscFunctionBegin; 881 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 882 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 883 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 884 if (iascii) { 885 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 886 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 887 MatInfo info; 888 PetscTruth inodes; 889 890 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 891 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 892 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 893 if (!inodes) { 894 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 895 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 896 } else { 897 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 898 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 899 } 900 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 901 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 902 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 903 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 904 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 905 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 906 PetscFunctionReturn(0); 907 } else if (format == PETSC_VIEWER_ASCII_INFO) { 908 PetscInt inodecount,inodelimit,*inodes; 909 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 910 if (inodes) { 911 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 912 } else { 913 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 914 } 915 PetscFunctionReturn(0); 916 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 917 PetscFunctionReturn(0); 918 } 919 } else if (isbinary) { 920 if (size == 1) { 921 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 922 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 923 } else { 924 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 925 } 926 PetscFunctionReturn(0); 927 } else if (isdraw) { 928 PetscDraw draw; 929 PetscTruth isnull; 930 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 931 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 932 } 933 934 if (size == 1) { 935 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 936 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 937 } else { 938 /* assemble the entire matrix onto first processor. */ 939 Mat A; 940 Mat_SeqAIJ *Aloc; 941 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 942 PetscScalar *a; 943 944 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 945 if (!rank) { 946 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 947 } else { 948 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 949 } 950 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 951 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 952 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 953 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 954 955 /* copy over the A part */ 956 Aloc = (Mat_SeqAIJ*)aij->A->data; 957 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 958 row = mat->rmap.rstart; 959 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 960 for (i=0; i<m; i++) { 961 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 962 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 963 } 964 aj = Aloc->j; 965 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 966 967 /* copy over the B part */ 968 Aloc = (Mat_SeqAIJ*)aij->B->data; 969 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 970 row = mat->rmap.rstart; 971 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 972 ct = cols; 973 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 974 for (i=0; i<m; i++) { 975 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 976 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 977 } 978 ierr = PetscFree(ct);CHKERRQ(ierr); 979 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 980 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 981 /* 982 Everyone has to call to draw the matrix since the graphics waits are 983 synchronized across all processors that share the PetscDraw object 984 */ 985 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 986 if (!rank) { 987 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 988 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 989 } 990 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 991 ierr = MatDestroy(A);CHKERRQ(ierr); 992 } 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatView_MPIAIJ" 998 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 999 { 1000 PetscErrorCode ierr; 1001 PetscTruth iascii,isdraw,issocket,isbinary; 1002 1003 PetscFunctionBegin; 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1005 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1006 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1007 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1008 if (iascii || isdraw || isbinary || issocket) { 1009 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1010 } else { 1011 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1012 } 1013 PetscFunctionReturn(0); 1014 } 1015 1016 1017 1018 #undef __FUNCT__ 1019 #define __FUNCT__ "MatRelax_MPIAIJ" 1020 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1021 { 1022 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1023 PetscErrorCode ierr; 1024 Vec bb1; 1025 1026 PetscFunctionBegin; 1027 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1028 1029 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1030 1031 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1032 if (flag & SOR_ZERO_INITIAL_GUESS) { 1033 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1034 its--; 1035 } 1036 1037 while (its--) { 1038 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1039 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1040 1041 /* update rhs: bb1 = bb - B*x */ 1042 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1043 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1044 1045 /* local sweep */ 1046 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1047 CHKERRQ(ierr); 1048 } 1049 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1050 if (flag & SOR_ZERO_INITIAL_GUESS) { 1051 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1052 its--; 1053 } 1054 while (its--) { 1055 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1056 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1057 1058 /* update rhs: bb1 = bb - B*x */ 1059 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1060 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1061 1062 /* local sweep */ 1063 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1064 CHKERRQ(ierr); 1065 } 1066 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1067 if (flag & SOR_ZERO_INITIAL_GUESS) { 1068 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1069 its--; 1070 } 1071 while (its--) { 1072 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1073 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1074 1075 /* update rhs: bb1 = bb - B*x */ 1076 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1077 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1078 1079 /* local sweep */ 1080 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1081 CHKERRQ(ierr); 1082 } 1083 } else { 1084 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1085 } 1086 1087 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatPermute_MPIAIJ" 1093 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1094 { 1095 MPI_Comm comm,pcomm; 1096 PetscInt first,local_size,nrows,*rows; 1097 int ntids; 1098 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1099 PetscErrorCode ierr; 1100 1101 PetscFunctionBegin; 1102 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1103 /* make a collective version of 'rowp' */ 1104 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1105 if (pcomm==comm) { 1106 crowp = rowp; 1107 } else { 1108 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1109 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1110 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1111 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1112 } 1113 /* collect the global row permutation and invert it */ 1114 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1115 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1116 if (pcomm!=comm) { 1117 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1118 } 1119 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1120 /* get the local target indices */ 1121 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1122 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1123 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1124 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1125 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1126 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1127 /* the column permutation is so much easier; 1128 make a local version of 'colp' and invert it */ 1129 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1130 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1131 if (ntids==1) { 1132 lcolp = colp; 1133 } else { 1134 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1135 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1136 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1137 } 1138 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1139 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1140 if (ntids>1) { 1141 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1142 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1143 } 1144 /* now we just get the submatrix */ 1145 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1146 /* clean up */ 1147 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1148 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1149 PetscFunctionReturn(0); 1150 } 1151 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1154 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1155 { 1156 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1157 Mat A = mat->A,B = mat->B; 1158 PetscErrorCode ierr; 1159 PetscReal isend[5],irecv[5]; 1160 1161 PetscFunctionBegin; 1162 info->block_size = 1.0; 1163 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1164 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1165 isend[3] = info->memory; isend[4] = info->mallocs; 1166 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1167 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1168 isend[3] += info->memory; isend[4] += info->mallocs; 1169 if (flag == MAT_LOCAL) { 1170 info->nz_used = isend[0]; 1171 info->nz_allocated = isend[1]; 1172 info->nz_unneeded = isend[2]; 1173 info->memory = isend[3]; 1174 info->mallocs = isend[4]; 1175 } else if (flag == MAT_GLOBAL_MAX) { 1176 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1177 info->nz_used = irecv[0]; 1178 info->nz_allocated = irecv[1]; 1179 info->nz_unneeded = irecv[2]; 1180 info->memory = irecv[3]; 1181 info->mallocs = irecv[4]; 1182 } else if (flag == MAT_GLOBAL_SUM) { 1183 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1184 info->nz_used = irecv[0]; 1185 info->nz_allocated = irecv[1]; 1186 info->nz_unneeded = irecv[2]; 1187 info->memory = irecv[3]; 1188 info->mallocs = irecv[4]; 1189 } 1190 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1191 info->fill_ratio_needed = 0; 1192 info->factor_mallocs = 0; 1193 info->rows_global = (double)matin->rmap.N; 1194 info->columns_global = (double)matin->cmap.N; 1195 info->rows_local = (double)matin->rmap.n; 1196 info->columns_local = (double)matin->cmap.N; 1197 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatSetOption_MPIAIJ" 1203 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1204 { 1205 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1206 PetscErrorCode ierr; 1207 1208 PetscFunctionBegin; 1209 switch (op) { 1210 case MAT_NO_NEW_NONZERO_LOCATIONS: 1211 case MAT_YES_NEW_NONZERO_LOCATIONS: 1212 case MAT_COLUMNS_UNSORTED: 1213 case MAT_COLUMNS_SORTED: 1214 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1215 case MAT_KEEP_ZEROED_ROWS: 1216 case MAT_NEW_NONZERO_LOCATION_ERR: 1217 case MAT_USE_INODES: 1218 case MAT_DO_NOT_USE_INODES: 1219 case MAT_IGNORE_ZERO_ENTRIES: 1220 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1221 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1222 break; 1223 case MAT_ROW_ORIENTED: 1224 a->roworiented = PETSC_TRUE; 1225 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1226 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1227 break; 1228 case MAT_ROWS_SORTED: 1229 case MAT_ROWS_UNSORTED: 1230 case MAT_YES_NEW_DIAGONALS: 1231 ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr); 1232 break; 1233 case MAT_COLUMN_ORIENTED: 1234 a->roworiented = PETSC_FALSE; 1235 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1236 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1237 break; 1238 case MAT_IGNORE_OFF_PROC_ENTRIES: 1239 a->donotstash = PETSC_TRUE; 1240 break; 1241 case MAT_NO_NEW_DIAGONALS: 1242 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1243 case MAT_SYMMETRIC: 1244 case MAT_STRUCTURALLY_SYMMETRIC: 1245 case MAT_HERMITIAN: 1246 case MAT_SYMMETRY_ETERNAL: 1247 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1248 break; 1249 case MAT_NOT_SYMMETRIC: 1250 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1251 case MAT_NOT_HERMITIAN: 1252 case MAT_NOT_SYMMETRY_ETERNAL: 1253 break; 1254 default: 1255 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1256 } 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "MatGetRow_MPIAIJ" 1262 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1263 { 1264 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1265 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1266 PetscErrorCode ierr; 1267 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1268 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1269 PetscInt *cmap,*idx_p; 1270 1271 PetscFunctionBegin; 1272 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1273 mat->getrowactive = PETSC_TRUE; 1274 1275 if (!mat->rowvalues && (idx || v)) { 1276 /* 1277 allocate enough space to hold information from the longest row. 1278 */ 1279 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1280 PetscInt max = 1,tmp; 1281 for (i=0; i<matin->rmap.n; i++) { 1282 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1283 if (max < tmp) { max = tmp; } 1284 } 1285 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1286 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1287 } 1288 1289 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1290 lrow = row - rstart; 1291 1292 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1293 if (!v) {pvA = 0; pvB = 0;} 1294 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1295 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1296 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1297 nztot = nzA + nzB; 1298 1299 cmap = mat->garray; 1300 if (v || idx) { 1301 if (nztot) { 1302 /* Sort by increasing column numbers, assuming A and B already sorted */ 1303 PetscInt imark = -1; 1304 if (v) { 1305 *v = v_p = mat->rowvalues; 1306 for (i=0; i<nzB; i++) { 1307 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1308 else break; 1309 } 1310 imark = i; 1311 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1312 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1313 } 1314 if (idx) { 1315 *idx = idx_p = mat->rowindices; 1316 if (imark > -1) { 1317 for (i=0; i<imark; i++) { 1318 idx_p[i] = cmap[cworkB[i]]; 1319 } 1320 } else { 1321 for (i=0; i<nzB; i++) { 1322 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1323 else break; 1324 } 1325 imark = i; 1326 } 1327 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1328 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1329 } 1330 } else { 1331 if (idx) *idx = 0; 1332 if (v) *v = 0; 1333 } 1334 } 1335 *nz = nztot; 1336 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1337 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 #undef __FUNCT__ 1342 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1343 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1344 { 1345 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1346 1347 PetscFunctionBegin; 1348 if (!aij->getrowactive) { 1349 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1350 } 1351 aij->getrowactive = PETSC_FALSE; 1352 PetscFunctionReturn(0); 1353 } 1354 1355 #undef __FUNCT__ 1356 #define __FUNCT__ "MatNorm_MPIAIJ" 1357 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1358 { 1359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1360 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1361 PetscErrorCode ierr; 1362 PetscInt i,j,cstart = mat->cmap.rstart; 1363 PetscReal sum = 0.0; 1364 PetscScalar *v; 1365 1366 PetscFunctionBegin; 1367 if (aij->size == 1) { 1368 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1369 } else { 1370 if (type == NORM_FROBENIUS) { 1371 v = amat->a; 1372 for (i=0; i<amat->nz; i++) { 1373 #if defined(PETSC_USE_COMPLEX) 1374 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1375 #else 1376 sum += (*v)*(*v); v++; 1377 #endif 1378 } 1379 v = bmat->a; 1380 for (i=0; i<bmat->nz; i++) { 1381 #if defined(PETSC_USE_COMPLEX) 1382 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1383 #else 1384 sum += (*v)*(*v); v++; 1385 #endif 1386 } 1387 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1388 *norm = sqrt(*norm); 1389 } else if (type == NORM_1) { /* max column norm */ 1390 PetscReal *tmp,*tmp2; 1391 PetscInt *jj,*garray = aij->garray; 1392 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1393 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1394 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1395 *norm = 0.0; 1396 v = amat->a; jj = amat->j; 1397 for (j=0; j<amat->nz; j++) { 1398 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1399 } 1400 v = bmat->a; jj = bmat->j; 1401 for (j=0; j<bmat->nz; j++) { 1402 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1403 } 1404 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1405 for (j=0; j<mat->cmap.N; j++) { 1406 if (tmp2[j] > *norm) *norm = tmp2[j]; 1407 } 1408 ierr = PetscFree(tmp);CHKERRQ(ierr); 1409 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1410 } else if (type == NORM_INFINITY) { /* max row norm */ 1411 PetscReal ntemp = 0.0; 1412 for (j=0; j<aij->A->rmap.n; j++) { 1413 v = amat->a + amat->i[j]; 1414 sum = 0.0; 1415 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1416 sum += PetscAbsScalar(*v); v++; 1417 } 1418 v = bmat->a + bmat->i[j]; 1419 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1420 sum += PetscAbsScalar(*v); v++; 1421 } 1422 if (sum > ntemp) ntemp = sum; 1423 } 1424 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1425 } else { 1426 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1427 } 1428 } 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatTranspose_MPIAIJ" 1434 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1435 { 1436 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1437 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1438 PetscErrorCode ierr; 1439 PetscInt M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1440 Mat B; 1441 PetscScalar *array; 1442 1443 PetscFunctionBegin; 1444 if (!matout && M != N) { 1445 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1446 } 1447 1448 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1449 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1450 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1451 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1452 1453 /* copy over the A part */ 1454 Aloc = (Mat_SeqAIJ*)a->A->data; 1455 m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1456 row = A->rmap.rstart; 1457 for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;} 1458 for (i=0; i<m; i++) { 1459 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1460 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1461 } 1462 aj = Aloc->j; 1463 for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;} 1464 1465 /* copy over the B part */ 1466 Aloc = (Mat_SeqAIJ*)a->B->data; 1467 m = a->B->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1468 row = A->rmap.rstart; 1469 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1470 ct = cols; 1471 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1472 for (i=0; i<m; i++) { 1473 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1474 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1475 } 1476 ierr = PetscFree(ct);CHKERRQ(ierr); 1477 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1478 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1479 if (matout) { 1480 *matout = B; 1481 } else { 1482 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1483 } 1484 PetscFunctionReturn(0); 1485 } 1486 1487 #undef __FUNCT__ 1488 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1489 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1490 { 1491 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1492 Mat a = aij->A,b = aij->B; 1493 PetscErrorCode ierr; 1494 PetscInt s1,s2,s3; 1495 1496 PetscFunctionBegin; 1497 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1498 if (rr) { 1499 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1500 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1501 /* Overlap communication with computation. */ 1502 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1503 } 1504 if (ll) { 1505 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1506 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1507 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1508 } 1509 /* scale the diagonal block */ 1510 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1511 1512 if (rr) { 1513 /* Do a scatter end and then right scale the off-diagonal block */ 1514 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1515 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1516 } 1517 1518 PetscFunctionReturn(0); 1519 } 1520 1521 1522 #undef __FUNCT__ 1523 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1524 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1525 { 1526 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1527 PetscErrorCode ierr; 1528 1529 PetscFunctionBegin; 1530 if (!a->rank) { 1531 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1532 } 1533 PetscFunctionReturn(0); 1534 } 1535 1536 #undef __FUNCT__ 1537 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1538 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1539 { 1540 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1541 PetscErrorCode ierr; 1542 1543 PetscFunctionBegin; 1544 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1545 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 #undef __FUNCT__ 1549 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1550 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1551 { 1552 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1553 PetscErrorCode ierr; 1554 1555 PetscFunctionBegin; 1556 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1557 PetscFunctionReturn(0); 1558 } 1559 1560 #undef __FUNCT__ 1561 #define __FUNCT__ "MatEqual_MPIAIJ" 1562 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1563 { 1564 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1565 Mat a,b,c,d; 1566 PetscTruth flg; 1567 PetscErrorCode ierr; 1568 1569 PetscFunctionBegin; 1570 a = matA->A; b = matA->B; 1571 c = matB->A; d = matB->B; 1572 1573 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1574 if (flg) { 1575 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1576 } 1577 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1578 PetscFunctionReturn(0); 1579 } 1580 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatCopy_MPIAIJ" 1583 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1584 { 1585 PetscErrorCode ierr; 1586 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1587 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1588 1589 PetscFunctionBegin; 1590 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1591 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1592 /* because of the column compression in the off-processor part of the matrix a->B, 1593 the number of columns in a->B and b->B may be different, hence we cannot call 1594 the MatCopy() directly on the two parts. If need be, we can provide a more 1595 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1596 then copying the submatrices */ 1597 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1598 } else { 1599 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1600 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1601 } 1602 PetscFunctionReturn(0); 1603 } 1604 1605 #undef __FUNCT__ 1606 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1607 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1608 { 1609 PetscErrorCode ierr; 1610 1611 PetscFunctionBegin; 1612 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1613 PetscFunctionReturn(0); 1614 } 1615 1616 #include "petscblaslapack.h" 1617 #undef __FUNCT__ 1618 #define __FUNCT__ "MatAXPY_MPIAIJ" 1619 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1620 { 1621 PetscErrorCode ierr; 1622 PetscInt i; 1623 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1624 PetscBLASInt bnz,one=1; 1625 Mat_SeqAIJ *x,*y; 1626 1627 PetscFunctionBegin; 1628 if (str == SAME_NONZERO_PATTERN) { 1629 PetscScalar alpha = a; 1630 x = (Mat_SeqAIJ *)xx->A->data; 1631 y = (Mat_SeqAIJ *)yy->A->data; 1632 bnz = (PetscBLASInt)x->nz; 1633 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1634 x = (Mat_SeqAIJ *)xx->B->data; 1635 y = (Mat_SeqAIJ *)yy->B->data; 1636 bnz = (PetscBLASInt)x->nz; 1637 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1638 } else if (str == SUBSET_NONZERO_PATTERN) { 1639 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1640 1641 x = (Mat_SeqAIJ *)xx->B->data; 1642 y = (Mat_SeqAIJ *)yy->B->data; 1643 if (y->xtoy && y->XtoY != xx->B) { 1644 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1645 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1646 } 1647 if (!y->xtoy) { /* get xtoy */ 1648 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1649 y->XtoY = xx->B; 1650 } 1651 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1652 } else { 1653 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1654 } 1655 PetscFunctionReturn(0); 1656 } 1657 1658 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1659 1660 #undef __FUNCT__ 1661 #define __FUNCT__ "MatConjugate_MPIAIJ" 1662 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1663 { 1664 #if defined(PETSC_USE_COMPLEX) 1665 PetscErrorCode ierr; 1666 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1667 1668 PetscFunctionBegin; 1669 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1670 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1671 #else 1672 PetscFunctionBegin; 1673 #endif 1674 PetscFunctionReturn(0); 1675 } 1676 1677 #undef __FUNCT__ 1678 #define __FUNCT__ "MatRealPart_MPIAIJ" 1679 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1680 { 1681 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1682 PetscErrorCode ierr; 1683 1684 PetscFunctionBegin; 1685 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1686 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1687 PetscFunctionReturn(0); 1688 } 1689 1690 #undef __FUNCT__ 1691 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1692 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1693 { 1694 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1695 PetscErrorCode ierr; 1696 1697 PetscFunctionBegin; 1698 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1699 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1700 PetscFunctionReturn(0); 1701 } 1702 1703 /* -------------------------------------------------------------------*/ 1704 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1705 MatGetRow_MPIAIJ, 1706 MatRestoreRow_MPIAIJ, 1707 MatMult_MPIAIJ, 1708 /* 4*/ MatMultAdd_MPIAIJ, 1709 MatMultTranspose_MPIAIJ, 1710 MatMultTransposeAdd_MPIAIJ, 1711 0, 1712 0, 1713 0, 1714 /*10*/ 0, 1715 0, 1716 0, 1717 MatRelax_MPIAIJ, 1718 MatTranspose_MPIAIJ, 1719 /*15*/ MatGetInfo_MPIAIJ, 1720 MatEqual_MPIAIJ, 1721 MatGetDiagonal_MPIAIJ, 1722 MatDiagonalScale_MPIAIJ, 1723 MatNorm_MPIAIJ, 1724 /*20*/ MatAssemblyBegin_MPIAIJ, 1725 MatAssemblyEnd_MPIAIJ, 1726 0, 1727 MatSetOption_MPIAIJ, 1728 MatZeroEntries_MPIAIJ, 1729 /*25*/ MatZeroRows_MPIAIJ, 1730 0, 1731 0, 1732 0, 1733 0, 1734 /*30*/ MatSetUpPreallocation_MPIAIJ, 1735 0, 1736 0, 1737 0, 1738 0, 1739 /*35*/ MatDuplicate_MPIAIJ, 1740 0, 1741 0, 1742 0, 1743 0, 1744 /*40*/ MatAXPY_MPIAIJ, 1745 MatGetSubMatrices_MPIAIJ, 1746 MatIncreaseOverlap_MPIAIJ, 1747 MatGetValues_MPIAIJ, 1748 MatCopy_MPIAIJ, 1749 /*45*/ MatPrintHelp_MPIAIJ, 1750 MatScale_MPIAIJ, 1751 0, 1752 0, 1753 0, 1754 /*50*/ MatSetBlockSize_MPIAIJ, 1755 0, 1756 0, 1757 0, 1758 0, 1759 /*55*/ MatFDColoringCreate_MPIAIJ, 1760 0, 1761 MatSetUnfactored_MPIAIJ, 1762 MatPermute_MPIAIJ, 1763 0, 1764 /*60*/ MatGetSubMatrix_MPIAIJ, 1765 MatDestroy_MPIAIJ, 1766 MatView_MPIAIJ, 1767 0, 1768 0, 1769 /*65*/ 0, 1770 0, 1771 0, 1772 0, 1773 0, 1774 /*70*/ 0, 1775 0, 1776 MatSetColoring_MPIAIJ, 1777 #if defined(PETSC_HAVE_ADIC) 1778 MatSetValuesAdic_MPIAIJ, 1779 #else 1780 0, 1781 #endif 1782 MatSetValuesAdifor_MPIAIJ, 1783 /*75*/ 0, 1784 0, 1785 0, 1786 0, 1787 0, 1788 /*80*/ 0, 1789 0, 1790 0, 1791 0, 1792 /*84*/ MatLoad_MPIAIJ, 1793 0, 1794 0, 1795 0, 1796 0, 1797 0, 1798 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1799 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1800 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1801 MatPtAP_Basic, 1802 MatPtAPSymbolic_MPIAIJ, 1803 /*95*/ MatPtAPNumeric_MPIAIJ, 1804 0, 1805 0, 1806 0, 1807 0, 1808 /*100*/0, 1809 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1810 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1811 MatConjugate_MPIAIJ, 1812 0, 1813 /*105*/MatSetValuesRow_MPIAIJ, 1814 MatRealPart_MPIAIJ, 1815 MatImaginaryPart_MPIAIJ}; 1816 1817 /* ----------------------------------------------------------------------------------------*/ 1818 1819 EXTERN_C_BEGIN 1820 #undef __FUNCT__ 1821 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1822 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1823 { 1824 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1825 PetscErrorCode ierr; 1826 1827 PetscFunctionBegin; 1828 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1829 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1830 PetscFunctionReturn(0); 1831 } 1832 EXTERN_C_END 1833 1834 EXTERN_C_BEGIN 1835 #undef __FUNCT__ 1836 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1837 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1838 { 1839 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1840 PetscErrorCode ierr; 1841 1842 PetscFunctionBegin; 1843 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1844 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1845 PetscFunctionReturn(0); 1846 } 1847 EXTERN_C_END 1848 1849 #include "petscpc.h" 1850 EXTERN_C_BEGIN 1851 #undef __FUNCT__ 1852 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1853 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1854 { 1855 Mat_MPIAIJ *b; 1856 PetscErrorCode ierr; 1857 PetscInt i; 1858 1859 PetscFunctionBegin; 1860 B->preallocated = PETSC_TRUE; 1861 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1862 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1863 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1864 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1865 1866 B->rmap.bs = B->cmap.bs = 1; 1867 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 1868 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 1869 if (d_nnz) { 1870 for (i=0; i<B->rmap.n; i++) { 1871 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1872 } 1873 } 1874 if (o_nnz) { 1875 for (i=0; i<B->rmap.n; i++) { 1876 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1877 } 1878 } 1879 b = (Mat_MPIAIJ*)B->data; 1880 1881 /* Explicitly create 2 MATSEQAIJ matrices. */ 1882 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1883 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1884 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1885 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1886 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1887 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 1888 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1889 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1890 1891 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1892 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1893 1894 PetscFunctionReturn(0); 1895 } 1896 EXTERN_C_END 1897 1898 #undef __FUNCT__ 1899 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1900 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1901 { 1902 Mat mat; 1903 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1904 PetscErrorCode ierr; 1905 1906 PetscFunctionBegin; 1907 *newmat = 0; 1908 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1909 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 1910 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1911 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1912 a = (Mat_MPIAIJ*)mat->data; 1913 1914 mat->factor = matin->factor; 1915 mat->rmap.bs = matin->rmap.bs; 1916 mat->assembled = PETSC_TRUE; 1917 mat->insertmode = NOT_SET_VALUES; 1918 mat->preallocated = PETSC_TRUE; 1919 1920 a->size = oldmat->size; 1921 a->rank = oldmat->rank; 1922 a->donotstash = oldmat->donotstash; 1923 a->roworiented = oldmat->roworiented; 1924 a->rowindices = 0; 1925 a->rowvalues = 0; 1926 a->getrowactive = PETSC_FALSE; 1927 1928 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 1929 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 1930 1931 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1932 if (oldmat->colmap) { 1933 #if defined (PETSC_USE_CTABLE) 1934 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1935 #else 1936 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1937 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 1938 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 1939 #endif 1940 } else a->colmap = 0; 1941 if (oldmat->garray) { 1942 PetscInt len; 1943 len = oldmat->B->cmap.n; 1944 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1945 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1946 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1947 } else a->garray = 0; 1948 1949 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1950 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1951 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1952 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1953 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1954 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1955 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1956 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1957 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1958 *newmat = mat; 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #include "petscsys.h" 1963 1964 #undef __FUNCT__ 1965 #define __FUNCT__ "MatLoad_MPIAIJ" 1966 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 1967 { 1968 Mat A; 1969 PetscScalar *vals,*svals; 1970 MPI_Comm comm = ((PetscObject)viewer)->comm; 1971 MPI_Status status; 1972 PetscErrorCode ierr; 1973 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1974 PetscInt i,nz,j,rstart,rend,mmax; 1975 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1976 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1977 PetscInt cend,cstart,n,*rowners; 1978 int fd; 1979 1980 PetscFunctionBegin; 1981 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1982 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1983 if (!rank) { 1984 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1985 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1986 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1987 } 1988 1989 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1990 M = header[1]; N = header[2]; 1991 /* determine ownership of all rows */ 1992 m = M/size + ((M % size) > rank); 1993 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1994 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1995 1996 /* First process needs enough room for process with most rows */ 1997 if (!rank) { 1998 mmax = rowners[1]; 1999 for (i=2; i<size; i++) { 2000 mmax = PetscMax(mmax,rowners[i]); 2001 } 2002 } else mmax = m; 2003 2004 rowners[0] = 0; 2005 for (i=2; i<=size; i++) { 2006 mmax = PetscMax(mmax,rowners[i]); 2007 rowners[i] += rowners[i-1]; 2008 } 2009 rstart = rowners[rank]; 2010 rend = rowners[rank+1]; 2011 2012 /* distribute row lengths to all processors */ 2013 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2014 if (!rank) { 2015 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2016 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2017 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2018 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2019 for (j=0; j<m; j++) { 2020 procsnz[0] += ourlens[j]; 2021 } 2022 for (i=1; i<size; i++) { 2023 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2024 /* calculate the number of nonzeros on each processor */ 2025 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2026 procsnz[i] += rowlengths[j]; 2027 } 2028 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2029 } 2030 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2031 } else { 2032 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2033 } 2034 2035 if (!rank) { 2036 /* determine max buffer needed and allocate it */ 2037 maxnz = 0; 2038 for (i=0; i<size; i++) { 2039 maxnz = PetscMax(maxnz,procsnz[i]); 2040 } 2041 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2042 2043 /* read in my part of the matrix column indices */ 2044 nz = procsnz[0]; 2045 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2046 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2047 2048 /* read in every one elses and ship off */ 2049 for (i=1; i<size; i++) { 2050 nz = procsnz[i]; 2051 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2052 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2053 } 2054 ierr = PetscFree(cols);CHKERRQ(ierr); 2055 } else { 2056 /* determine buffer space needed for message */ 2057 nz = 0; 2058 for (i=0; i<m; i++) { 2059 nz += ourlens[i]; 2060 } 2061 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2062 2063 /* receive message of column indices*/ 2064 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2065 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2066 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2067 } 2068 2069 /* determine column ownership if matrix is not square */ 2070 if (N != M) { 2071 n = N/size + ((N % size) > rank); 2072 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2073 cstart = cend - n; 2074 } else { 2075 cstart = rstart; 2076 cend = rend; 2077 n = cend - cstart; 2078 } 2079 2080 /* loop over local rows, determining number of off diagonal entries */ 2081 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2082 jj = 0; 2083 for (i=0; i<m; i++) { 2084 for (j=0; j<ourlens[i]; j++) { 2085 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2086 jj++; 2087 } 2088 } 2089 2090 /* create our matrix */ 2091 for (i=0; i<m; i++) { 2092 ourlens[i] -= offlens[i]; 2093 } 2094 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2095 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2096 ierr = MatSetType(A,type);CHKERRQ(ierr); 2097 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2098 2099 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2100 for (i=0; i<m; i++) { 2101 ourlens[i] += offlens[i]; 2102 } 2103 2104 if (!rank) { 2105 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2106 2107 /* read in my part of the matrix numerical values */ 2108 nz = procsnz[0]; 2109 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2110 2111 /* insert into matrix */ 2112 jj = rstart; 2113 smycols = mycols; 2114 svals = vals; 2115 for (i=0; i<m; i++) { 2116 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2117 smycols += ourlens[i]; 2118 svals += ourlens[i]; 2119 jj++; 2120 } 2121 2122 /* read in other processors and ship out */ 2123 for (i=1; i<size; i++) { 2124 nz = procsnz[i]; 2125 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2126 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2127 } 2128 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2129 } else { 2130 /* receive numeric values */ 2131 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2132 2133 /* receive message of values*/ 2134 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2135 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2136 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2137 2138 /* insert into matrix */ 2139 jj = rstart; 2140 smycols = mycols; 2141 svals = vals; 2142 for (i=0; i<m; i++) { 2143 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2144 smycols += ourlens[i]; 2145 svals += ourlens[i]; 2146 jj++; 2147 } 2148 } 2149 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2150 ierr = PetscFree(vals);CHKERRQ(ierr); 2151 ierr = PetscFree(mycols);CHKERRQ(ierr); 2152 ierr = PetscFree(rowners);CHKERRQ(ierr); 2153 2154 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2155 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2156 *newmat = A; 2157 PetscFunctionReturn(0); 2158 } 2159 2160 #undef __FUNCT__ 2161 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2162 /* 2163 Not great since it makes two copies of the submatrix, first an SeqAIJ 2164 in local and then by concatenating the local matrices the end result. 2165 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2166 */ 2167 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2168 { 2169 PetscErrorCode ierr; 2170 PetscMPIInt rank,size; 2171 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2172 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2173 Mat *local,M,Mreuse; 2174 PetscScalar *vwork,*aa; 2175 MPI_Comm comm = mat->comm; 2176 Mat_SeqAIJ *aij; 2177 2178 2179 PetscFunctionBegin; 2180 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2181 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2182 2183 if (call == MAT_REUSE_MATRIX) { 2184 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2185 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2186 local = &Mreuse; 2187 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2188 } else { 2189 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2190 Mreuse = *local; 2191 ierr = PetscFree(local);CHKERRQ(ierr); 2192 } 2193 2194 /* 2195 m - number of local rows 2196 n - number of columns (same on all processors) 2197 rstart - first row in new global matrix generated 2198 */ 2199 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2200 if (call == MAT_INITIAL_MATRIX) { 2201 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2202 ii = aij->i; 2203 jj = aij->j; 2204 2205 /* 2206 Determine the number of non-zeros in the diagonal and off-diagonal 2207 portions of the matrix in order to do correct preallocation 2208 */ 2209 2210 /* first get start and end of "diagonal" columns */ 2211 if (csize == PETSC_DECIDE) { 2212 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2213 if (mglobal == n) { /* square matrix */ 2214 nlocal = m; 2215 } else { 2216 nlocal = n/size + ((n % size) > rank); 2217 } 2218 } else { 2219 nlocal = csize; 2220 } 2221 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2222 rstart = rend - nlocal; 2223 if (rank == size - 1 && rend != n) { 2224 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2225 } 2226 2227 /* next, compute all the lengths */ 2228 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2229 olens = dlens + m; 2230 for (i=0; i<m; i++) { 2231 jend = ii[i+1] - ii[i]; 2232 olen = 0; 2233 dlen = 0; 2234 for (j=0; j<jend; j++) { 2235 if (*jj < rstart || *jj >= rend) olen++; 2236 else dlen++; 2237 jj++; 2238 } 2239 olens[i] = olen; 2240 dlens[i] = dlen; 2241 } 2242 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2243 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2244 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2245 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2246 ierr = PetscFree(dlens);CHKERRQ(ierr); 2247 } else { 2248 PetscInt ml,nl; 2249 2250 M = *newmat; 2251 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2252 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2253 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2254 /* 2255 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2256 rather than the slower MatSetValues(). 2257 */ 2258 M->was_assembled = PETSC_TRUE; 2259 M->assembled = PETSC_FALSE; 2260 } 2261 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2262 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2263 ii = aij->i; 2264 jj = aij->j; 2265 aa = aij->a; 2266 for (i=0; i<m; i++) { 2267 row = rstart + i; 2268 nz = ii[i+1] - ii[i]; 2269 cwork = jj; jj += nz; 2270 vwork = aa; aa += nz; 2271 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2272 } 2273 2274 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2275 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2276 *newmat = M; 2277 2278 /* save submatrix used in processor for next request */ 2279 if (call == MAT_INITIAL_MATRIX) { 2280 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2281 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2282 } 2283 2284 PetscFunctionReturn(0); 2285 } 2286 2287 EXTERN_C_BEGIN 2288 #undef __FUNCT__ 2289 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2290 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2291 { 2292 PetscInt m,cstart, cend,j,nnz,i,d; 2293 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2294 const PetscInt *JJ; 2295 PetscScalar *values; 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]); 2300 2301 B->rmap.bs = B->cmap.bs = 1; 2302 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2303 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2304 m = B->rmap.n; 2305 cstart = B->cmap.rstart; 2306 cend = B->cmap.rend; 2307 rstart = B->rmap.rstart; 2308 2309 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2310 o_nnz = d_nnz + m; 2311 2312 for (i=0; i<m; i++) { 2313 nnz = I[i+1]- I[i]; 2314 JJ = J + I[i]; 2315 nnz_max = PetscMax(nnz_max,nnz); 2316 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2317 for (j=0; j<nnz; j++) { 2318 if (*JJ >= cstart) break; 2319 JJ++; 2320 } 2321 d = 0; 2322 for (; j<nnz; j++) { 2323 if (*JJ++ >= cend) break; 2324 d++; 2325 } 2326 d_nnz[i] = d; 2327 o_nnz[i] = nnz - d; 2328 } 2329 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2330 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2331 2332 if (v) values = (PetscScalar*)v; 2333 else { 2334 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2335 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2336 } 2337 2338 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2339 for (i=0; i<m; i++) { 2340 ii = i + rstart; 2341 nnz = I[i+1]- I[i]; 2342 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2343 } 2344 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2345 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2346 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2347 2348 if (!v) { 2349 ierr = PetscFree(values);CHKERRQ(ierr); 2350 } 2351 PetscFunctionReturn(0); 2352 } 2353 EXTERN_C_END 2354 2355 #undef __FUNCT__ 2356 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2357 /*@ 2358 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2359 (the default parallel PETSc format). 2360 2361 Collective on MPI_Comm 2362 2363 Input Parameters: 2364 + B - the matrix 2365 . i - the indices into j for the start of each local row (starts with zero) 2366 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2367 - v - optional values in the matrix 2368 2369 Level: developer 2370 2371 .keywords: matrix, aij, compressed row, sparse, parallel 2372 2373 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2374 @*/ 2375 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2376 { 2377 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2378 2379 PetscFunctionBegin; 2380 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2381 if (f) { 2382 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2383 } 2384 PetscFunctionReturn(0); 2385 } 2386 2387 #undef __FUNCT__ 2388 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2389 /*@C 2390 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2391 (the default parallel PETSc format). For good matrix assembly performance 2392 the user should preallocate the matrix storage by setting the parameters 2393 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2394 performance can be increased by more than a factor of 50. 2395 2396 Collective on MPI_Comm 2397 2398 Input Parameters: 2399 + A - the matrix 2400 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2401 (same value is used for all local rows) 2402 . d_nnz - array containing the number of nonzeros in the various rows of the 2403 DIAGONAL portion of the local submatrix (possibly different for each row) 2404 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2405 The size of this array is equal to the number of local rows, i.e 'm'. 2406 You must leave room for the diagonal entry even if it is zero. 2407 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2408 submatrix (same value is used for all local rows). 2409 - o_nnz - array containing the number of nonzeros in the various rows of the 2410 OFF-DIAGONAL portion of the local submatrix (possibly different for 2411 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2412 structure. The size of this array is equal to the number 2413 of local rows, i.e 'm'. 2414 2415 If the *_nnz parameter is given then the *_nz parameter is ignored 2416 2417 The AIJ format (also called the Yale sparse matrix format or 2418 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2419 storage. The stored row and column indices begin with zero. See the users manual for details. 2420 2421 The parallel matrix is partitioned such that the first m0 rows belong to 2422 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2423 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2424 2425 The DIAGONAL portion of the local submatrix of a processor can be defined 2426 as the submatrix which is obtained by extraction the part corresponding 2427 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2428 first row that belongs to the processor, and r2 is the last row belonging 2429 to the this processor. This is a square mxm matrix. The remaining portion 2430 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2431 2432 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2433 2434 Example usage: 2435 2436 Consider the following 8x8 matrix with 34 non-zero values, that is 2437 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2438 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2439 as follows: 2440 2441 .vb 2442 1 2 0 | 0 3 0 | 0 4 2443 Proc0 0 5 6 | 7 0 0 | 8 0 2444 9 0 10 | 11 0 0 | 12 0 2445 ------------------------------------- 2446 13 0 14 | 15 16 17 | 0 0 2447 Proc1 0 18 0 | 19 20 21 | 0 0 2448 0 0 0 | 22 23 0 | 24 0 2449 ------------------------------------- 2450 Proc2 25 26 27 | 0 0 28 | 29 0 2451 30 0 0 | 31 32 33 | 0 34 2452 .ve 2453 2454 This can be represented as a collection of submatrices as: 2455 2456 .vb 2457 A B C 2458 D E F 2459 G H I 2460 .ve 2461 2462 Where the submatrices A,B,C are owned by proc0, D,E,F are 2463 owned by proc1, G,H,I are owned by proc2. 2464 2465 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2466 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2467 The 'M','N' parameters are 8,8, and have the same values on all procs. 2468 2469 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2470 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2471 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2472 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2473 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2474 matrix, ans [DF] as another SeqAIJ matrix. 2475 2476 When d_nz, o_nz parameters are specified, d_nz storage elements are 2477 allocated for every row of the local diagonal submatrix, and o_nz 2478 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2479 One way to choose d_nz and o_nz is to use the max nonzerors per local 2480 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2481 In this case, the values of d_nz,o_nz are: 2482 .vb 2483 proc0 : dnz = 2, o_nz = 2 2484 proc1 : dnz = 3, o_nz = 2 2485 proc2 : dnz = 1, o_nz = 4 2486 .ve 2487 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2488 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2489 for proc3. i.e we are using 12+15+10=37 storage locations to store 2490 34 values. 2491 2492 When d_nnz, o_nnz parameters are specified, the storage is specified 2493 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2494 In the above case the values for d_nnz,o_nnz are: 2495 .vb 2496 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2497 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2498 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2499 .ve 2500 Here the space allocated is sum of all the above values i.e 34, and 2501 hence pre-allocation is perfect. 2502 2503 Level: intermediate 2504 2505 .keywords: matrix, aij, compressed row, sparse, parallel 2506 2507 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2508 MPIAIJ 2509 @*/ 2510 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2511 { 2512 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2513 2514 PetscFunctionBegin; 2515 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2516 if (f) { 2517 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2518 } 2519 PetscFunctionReturn(0); 2520 } 2521 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "MatCreateMPIAIJ" 2524 /*@C 2525 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2526 (the default parallel PETSc format). For good matrix assembly performance 2527 the user should preallocate the matrix storage by setting the parameters 2528 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2529 performance can be increased by more than a factor of 50. 2530 2531 Collective on MPI_Comm 2532 2533 Input Parameters: 2534 + comm - MPI communicator 2535 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2536 This value should be the same as the local size used in creating the 2537 y vector for the matrix-vector product y = Ax. 2538 . n - This value should be the same as the local size used in creating the 2539 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2540 calculated if N is given) For square matrices n is almost always m. 2541 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2542 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2543 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2544 (same value is used for all local rows) 2545 . d_nnz - array containing the number of nonzeros in the various rows of the 2546 DIAGONAL portion of the local submatrix (possibly different for each row) 2547 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2548 The size of this array is equal to the number of local rows, i.e 'm'. 2549 You must leave room for the diagonal entry even if it is zero. 2550 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2551 submatrix (same value is used for all local rows). 2552 - o_nnz - array containing the number of nonzeros in the various rows of the 2553 OFF-DIAGONAL portion of the local submatrix (possibly different for 2554 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2555 structure. The size of this array is equal to the number 2556 of local rows, i.e 'm'. 2557 2558 Output Parameter: 2559 . A - the matrix 2560 2561 Notes: 2562 If the *_nnz parameter is given then the *_nz parameter is ignored 2563 2564 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2565 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2566 storage requirements for this matrix. 2567 2568 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2569 processor than it must be used on all processors that share the object for 2570 that argument. 2571 2572 The user MUST specify either the local or global matrix dimensions 2573 (possibly both). 2574 2575 The parallel matrix is partitioned such that the first m0 rows belong to 2576 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2577 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2578 2579 The DIAGONAL portion of the local submatrix of a processor can be defined 2580 as the submatrix which is obtained by extraction the part corresponding 2581 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2582 first row that belongs to the processor, and r2 is the last row belonging 2583 to the this processor. This is a square mxm matrix. The remaining portion 2584 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2585 2586 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2587 2588 When calling this routine with a single process communicator, a matrix of 2589 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2590 type of communicator, use the construction mechanism: 2591 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2592 2593 By default, this format uses inodes (identical nodes) when possible. 2594 We search for consecutive rows with the same nonzero structure, thereby 2595 reusing matrix information to achieve increased efficiency. 2596 2597 Options Database Keys: 2598 + -mat_no_inode - Do not use inodes 2599 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2600 - -mat_aij_oneindex - Internally use indexing starting at 1 2601 rather than 0. Note that when calling MatSetValues(), 2602 the user still MUST index entries starting at 0! 2603 2604 2605 Example usage: 2606 2607 Consider the following 8x8 matrix with 34 non-zero values, that is 2608 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2609 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2610 as follows: 2611 2612 .vb 2613 1 2 0 | 0 3 0 | 0 4 2614 Proc0 0 5 6 | 7 0 0 | 8 0 2615 9 0 10 | 11 0 0 | 12 0 2616 ------------------------------------- 2617 13 0 14 | 15 16 17 | 0 0 2618 Proc1 0 18 0 | 19 20 21 | 0 0 2619 0 0 0 | 22 23 0 | 24 0 2620 ------------------------------------- 2621 Proc2 25 26 27 | 0 0 28 | 29 0 2622 30 0 0 | 31 32 33 | 0 34 2623 .ve 2624 2625 This can be represented as a collection of submatrices as: 2626 2627 .vb 2628 A B C 2629 D E F 2630 G H I 2631 .ve 2632 2633 Where the submatrices A,B,C are owned by proc0, D,E,F are 2634 owned by proc1, G,H,I are owned by proc2. 2635 2636 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2637 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2638 The 'M','N' parameters are 8,8, and have the same values on all procs. 2639 2640 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2641 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2642 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2643 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2644 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2645 matrix, ans [DF] as another SeqAIJ matrix. 2646 2647 When d_nz, o_nz parameters are specified, d_nz storage elements are 2648 allocated for every row of the local diagonal submatrix, and o_nz 2649 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2650 One way to choose d_nz and o_nz is to use the max nonzerors per local 2651 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2652 In this case, the values of d_nz,o_nz are: 2653 .vb 2654 proc0 : dnz = 2, o_nz = 2 2655 proc1 : dnz = 3, o_nz = 2 2656 proc2 : dnz = 1, o_nz = 4 2657 .ve 2658 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2659 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2660 for proc3. i.e we are using 12+15+10=37 storage locations to store 2661 34 values. 2662 2663 When d_nnz, o_nnz parameters are specified, the storage is specified 2664 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2665 In the above case the values for d_nnz,o_nnz are: 2666 .vb 2667 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2668 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2669 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2670 .ve 2671 Here the space allocated is sum of all the above values i.e 34, and 2672 hence pre-allocation is perfect. 2673 2674 Level: intermediate 2675 2676 .keywords: matrix, aij, compressed row, sparse, parallel 2677 2678 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2679 MPIAIJ 2680 @*/ 2681 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2682 { 2683 PetscErrorCode ierr; 2684 PetscMPIInt size; 2685 2686 PetscFunctionBegin; 2687 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2688 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2689 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2690 if (size > 1) { 2691 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2692 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2693 } else { 2694 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2695 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2696 } 2697 PetscFunctionReturn(0); 2698 } 2699 2700 #undef __FUNCT__ 2701 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2702 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2703 { 2704 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2705 2706 PetscFunctionBegin; 2707 *Ad = a->A; 2708 *Ao = a->B; 2709 *colmap = a->garray; 2710 PetscFunctionReturn(0); 2711 } 2712 2713 #undef __FUNCT__ 2714 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2715 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2716 { 2717 PetscErrorCode ierr; 2718 PetscInt i; 2719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2720 2721 PetscFunctionBegin; 2722 if (coloring->ctype == IS_COLORING_LOCAL) { 2723 ISColoringValue *allcolors,*colors; 2724 ISColoring ocoloring; 2725 2726 /* set coloring for diagonal portion */ 2727 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2728 2729 /* set coloring for off-diagonal portion */ 2730 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2731 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2732 for (i=0; i<a->B->cmap.n; i++) { 2733 colors[i] = allcolors[a->garray[i]]; 2734 } 2735 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2736 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->cmap.n,coloring->n,colors,&ocoloring);CHKERRQ(ierr); 2737 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2738 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2739 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2740 ISColoringValue *colors; 2741 PetscInt *larray; 2742 ISColoring ocoloring; 2743 2744 /* set coloring for diagonal portion */ 2745 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2746 for (i=0; i<a->A->cmap.n; i++) { 2747 larray[i] = i + A->cmap.rstart; 2748 } 2749 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2750 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2751 for (i=0; i<a->A->cmap.n; i++) { 2752 colors[i] = coloring->colors[larray[i]]; 2753 } 2754 ierr = PetscFree(larray);CHKERRQ(ierr); 2755 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->cmap.n,coloring->n,colors,&ocoloring);CHKERRQ(ierr); 2756 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2757 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2758 2759 /* set coloring for off-diagonal portion */ 2760 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2761 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2762 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2763 for (i=0; i<a->B->cmap.n; i++) { 2764 colors[i] = coloring->colors[larray[i]]; 2765 } 2766 ierr = PetscFree(larray);CHKERRQ(ierr); 2767 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->cmap.n,coloring->n,colors,&ocoloring);CHKERRQ(ierr); 2768 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2769 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2770 } else { 2771 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2772 } 2773 2774 PetscFunctionReturn(0); 2775 } 2776 2777 #if defined(PETSC_HAVE_ADIC) 2778 #undef __FUNCT__ 2779 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2780 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2781 { 2782 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2783 PetscErrorCode ierr; 2784 2785 PetscFunctionBegin; 2786 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2787 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2788 PetscFunctionReturn(0); 2789 } 2790 #endif 2791 2792 #undef __FUNCT__ 2793 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2794 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2795 { 2796 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2797 PetscErrorCode ierr; 2798 2799 PetscFunctionBegin; 2800 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2801 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2802 PetscFunctionReturn(0); 2803 } 2804 2805 #undef __FUNCT__ 2806 #define __FUNCT__ "MatMerge" 2807 /*@C 2808 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2809 matrices from each processor 2810 2811 Collective on MPI_Comm 2812 2813 Input Parameters: 2814 + comm - the communicators the parallel matrix will live on 2815 . inmat - the input sequential matrices 2816 . n - number of local columns (or PETSC_DECIDE) 2817 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2818 2819 Output Parameter: 2820 . outmat - the parallel matrix generated 2821 2822 Level: advanced 2823 2824 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2825 2826 @*/ 2827 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2828 { 2829 PetscErrorCode ierr; 2830 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2831 PetscInt *indx; 2832 PetscScalar *values; 2833 2834 PetscFunctionBegin; 2835 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2836 if (scall == MAT_INITIAL_MATRIX){ 2837 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2838 if (n == PETSC_DECIDE){ 2839 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 2840 } 2841 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2842 rstart -= m; 2843 2844 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2845 for (i=0;i<m;i++) { 2846 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2847 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2848 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2849 } 2850 /* This routine will ONLY return MPIAIJ type matrix */ 2851 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2852 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2853 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2854 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2855 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2856 2857 } else if (scall == MAT_REUSE_MATRIX){ 2858 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2859 } else { 2860 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2861 } 2862 2863 for (i=0;i<m;i++) { 2864 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2865 I = i + rstart; 2866 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2867 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2868 } 2869 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2870 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2871 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2872 2873 PetscFunctionReturn(0); 2874 } 2875 2876 #undef __FUNCT__ 2877 #define __FUNCT__ "MatFileSplit" 2878 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2879 { 2880 PetscErrorCode ierr; 2881 PetscMPIInt rank; 2882 PetscInt m,N,i,rstart,nnz; 2883 size_t len; 2884 const PetscInt *indx; 2885 PetscViewer out; 2886 char *name; 2887 Mat B; 2888 const PetscScalar *values; 2889 2890 PetscFunctionBegin; 2891 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2892 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2893 /* Should this be the type of the diagonal block of A? */ 2894 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2895 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 2896 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2897 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2898 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2899 for (i=0;i<m;i++) { 2900 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2901 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2902 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2903 } 2904 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2905 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2906 2907 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2908 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2909 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2910 sprintf(name,"%s.%d",outfile,rank); 2911 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 2912 ierr = PetscFree(name); 2913 ierr = MatView(B,out);CHKERRQ(ierr); 2914 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2915 ierr = MatDestroy(B);CHKERRQ(ierr); 2916 PetscFunctionReturn(0); 2917 } 2918 2919 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2920 #undef __FUNCT__ 2921 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2922 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2923 { 2924 PetscErrorCode ierr; 2925 Mat_Merge_SeqsToMPI *merge; 2926 PetscObjectContainer container; 2927 2928 PetscFunctionBegin; 2929 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2930 if (container) { 2931 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2932 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2933 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2934 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2935 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2936 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2937 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2938 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2939 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2940 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2941 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2942 2943 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2944 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2945 } 2946 ierr = PetscFree(merge);CHKERRQ(ierr); 2947 2948 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2949 PetscFunctionReturn(0); 2950 } 2951 2952 #include "src/mat/utils/freespace.h" 2953 #include "petscbt.h" 2954 static PetscEvent logkey_seqstompinum = 0; 2955 #undef __FUNCT__ 2956 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 2957 /*@C 2958 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2959 matrices from each processor 2960 2961 Collective on MPI_Comm 2962 2963 Input Parameters: 2964 + comm - the communicators the parallel matrix will live on 2965 . seqmat - the input sequential matrices 2966 . m - number of local rows (or PETSC_DECIDE) 2967 . n - number of local columns (or PETSC_DECIDE) 2968 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2969 2970 Output Parameter: 2971 . mpimat - the parallel matrix generated 2972 2973 Level: advanced 2974 2975 Notes: 2976 The dimensions of the sequential matrix in each processor MUST be the same. 2977 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2978 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2979 @*/ 2980 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2981 { 2982 PetscErrorCode ierr; 2983 MPI_Comm comm=mpimat->comm; 2984 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2985 PetscMPIInt size,rank,taga,*len_s; 2986 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 2987 PetscInt proc,m; 2988 PetscInt **buf_ri,**buf_rj; 2989 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2990 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2991 MPI_Request *s_waits,*r_waits; 2992 MPI_Status *status; 2993 MatScalar *aa=a->a,**abuf_r,*ba_i; 2994 Mat_Merge_SeqsToMPI *merge; 2995 PetscObjectContainer container; 2996 2997 PetscFunctionBegin; 2998 if (!logkey_seqstompinum) { 2999 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3000 } 3001 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3002 3003 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3004 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3005 3006 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3007 if (container) { 3008 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3009 } 3010 bi = merge->bi; 3011 bj = merge->bj; 3012 buf_ri = merge->buf_ri; 3013 buf_rj = merge->buf_rj; 3014 3015 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3016 owners = merge->rowmap.range; 3017 len_s = merge->len_s; 3018 3019 /* send and recv matrix values */ 3020 /*-----------------------------*/ 3021 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3022 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3023 3024 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3025 for (proc=0,k=0; proc<size; proc++){ 3026 if (!len_s[proc]) continue; 3027 i = owners[proc]; 3028 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3029 k++; 3030 } 3031 3032 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3033 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3034 ierr = PetscFree(status);CHKERRQ(ierr); 3035 3036 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3037 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3038 3039 /* insert mat values of mpimat */ 3040 /*----------------------------*/ 3041 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3042 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3043 nextrow = buf_ri_k + merge->nrecv; 3044 nextai = nextrow + merge->nrecv; 3045 3046 for (k=0; k<merge->nrecv; k++){ 3047 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3048 nrows = *(buf_ri_k[k]); 3049 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3050 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3051 } 3052 3053 /* set values of ba */ 3054 m = merge->rowmap.n; 3055 for (i=0; i<m; i++) { 3056 arow = owners[rank] + i; 3057 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3058 bnzi = bi[i+1] - bi[i]; 3059 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3060 3061 /* add local non-zero vals of this proc's seqmat into ba */ 3062 anzi = ai[arow+1] - ai[arow]; 3063 aj = a->j + ai[arow]; 3064 aa = a->a + ai[arow]; 3065 nextaj = 0; 3066 for (j=0; nextaj<anzi; j++){ 3067 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3068 ba_i[j] += aa[nextaj++]; 3069 } 3070 } 3071 3072 /* add received vals into ba */ 3073 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3074 /* i-th row */ 3075 if (i == *nextrow[k]) { 3076 anzi = *(nextai[k]+1) - *nextai[k]; 3077 aj = buf_rj[k] + *(nextai[k]); 3078 aa = abuf_r[k] + *(nextai[k]); 3079 nextaj = 0; 3080 for (j=0; nextaj<anzi; j++){ 3081 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3082 ba_i[j] += aa[nextaj++]; 3083 } 3084 } 3085 nextrow[k]++; nextai[k]++; 3086 } 3087 } 3088 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3089 } 3090 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3091 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3092 3093 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3094 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3095 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3096 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3097 PetscFunctionReturn(0); 3098 } 3099 3100 static PetscEvent logkey_seqstompisym = 0; 3101 #undef __FUNCT__ 3102 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3103 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3104 { 3105 PetscErrorCode ierr; 3106 Mat B_mpi; 3107 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3108 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3109 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3110 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3111 PetscInt len,proc,*dnz,*onz; 3112 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3113 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3114 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3115 MPI_Status *status; 3116 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3117 PetscBT lnkbt; 3118 Mat_Merge_SeqsToMPI *merge; 3119 PetscObjectContainer container; 3120 3121 PetscFunctionBegin; 3122 if (!logkey_seqstompisym) { 3123 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3124 } 3125 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3126 3127 /* make sure it is a PETSc comm */ 3128 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3129 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3130 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3131 3132 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3133 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3134 3135 /* determine row ownership */ 3136 /*---------------------------------------------------------*/ 3137 merge->rowmap.n = m; 3138 merge->rowmap.N = M; 3139 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3140 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3141 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3142 3143 m = merge->rowmap.n; 3144 M = merge->rowmap.N; 3145 owners = merge->rowmap.range; 3146 3147 /* determine the number of messages to send, their lengths */ 3148 /*---------------------------------------------------------*/ 3149 len_s = merge->len_s; 3150 3151 len = 0; /* length of buf_si[] */ 3152 merge->nsend = 0; 3153 for (proc=0; proc<size; proc++){ 3154 len_si[proc] = 0; 3155 if (proc == rank){ 3156 len_s[proc] = 0; 3157 } else { 3158 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3159 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3160 } 3161 if (len_s[proc]) { 3162 merge->nsend++; 3163 nrows = 0; 3164 for (i=owners[proc]; i<owners[proc+1]; i++){ 3165 if (ai[i+1] > ai[i]) nrows++; 3166 } 3167 len_si[proc] = 2*(nrows+1); 3168 len += len_si[proc]; 3169 } 3170 } 3171 3172 /* determine the number and length of messages to receive for ij-structure */ 3173 /*-------------------------------------------------------------------------*/ 3174 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3175 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3176 3177 /* post the Irecv of j-structure */ 3178 /*-------------------------------*/ 3179 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&tagj);CHKERRQ(ierr); 3180 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3181 3182 /* post the Isend of j-structure */ 3183 /*--------------------------------*/ 3184 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3185 sj_waits = si_waits + merge->nsend; 3186 3187 for (proc=0, k=0; proc<size; proc++){ 3188 if (!len_s[proc]) continue; 3189 i = owners[proc]; 3190 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3191 k++; 3192 } 3193 3194 /* receives and sends of j-structure are complete */ 3195 /*------------------------------------------------*/ 3196 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3197 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3198 3199 /* send and recv i-structure */ 3200 /*---------------------------*/ 3201 ierr = PetscObjectGetNewTag((PetscObject)merge,&tagi);CHKERRQ(ierr); 3202 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3203 3204 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3205 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3206 for (proc=0,k=0; proc<size; proc++){ 3207 if (!len_s[proc]) continue; 3208 /* form outgoing message for i-structure: 3209 buf_si[0]: nrows to be sent 3210 [1:nrows]: row index (global) 3211 [nrows+1:2*nrows+1]: i-structure index 3212 */ 3213 /*-------------------------------------------*/ 3214 nrows = len_si[proc]/2 - 1; 3215 buf_si_i = buf_si + nrows+1; 3216 buf_si[0] = nrows; 3217 buf_si_i[0] = 0; 3218 nrows = 0; 3219 for (i=owners[proc]; i<owners[proc+1]; i++){ 3220 anzi = ai[i+1] - ai[i]; 3221 if (anzi) { 3222 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3223 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3224 nrows++; 3225 } 3226 } 3227 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3228 k++; 3229 buf_si += len_si[proc]; 3230 } 3231 3232 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3233 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3234 3235 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3236 for (i=0; i<merge->nrecv; i++){ 3237 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3238 } 3239 3240 ierr = PetscFree(len_si);CHKERRQ(ierr); 3241 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3242 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3243 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3244 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3245 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3246 ierr = PetscFree(status);CHKERRQ(ierr); 3247 3248 /* compute a local seq matrix in each processor */ 3249 /*----------------------------------------------*/ 3250 /* allocate bi array and free space for accumulating nonzero column info */ 3251 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3252 bi[0] = 0; 3253 3254 /* create and initialize a linked list */ 3255 nlnk = N+1; 3256 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3257 3258 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3259 len = 0; 3260 len = ai[owners[rank+1]] - ai[owners[rank]]; 3261 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3262 current_space = free_space; 3263 3264 /* determine symbolic info for each local row */ 3265 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3266 nextrow = buf_ri_k + merge->nrecv; 3267 nextai = nextrow + merge->nrecv; 3268 for (k=0; k<merge->nrecv; k++){ 3269 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3270 nrows = *buf_ri_k[k]; 3271 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3272 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3273 } 3274 3275 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3276 len = 0; 3277 for (i=0;i<m;i++) { 3278 bnzi = 0; 3279 /* add local non-zero cols of this proc's seqmat into lnk */ 3280 arow = owners[rank] + i; 3281 anzi = ai[arow+1] - ai[arow]; 3282 aj = a->j + ai[arow]; 3283 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3284 bnzi += nlnk; 3285 /* add received col data into lnk */ 3286 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3287 if (i == *nextrow[k]) { /* i-th row */ 3288 anzi = *(nextai[k]+1) - *nextai[k]; 3289 aj = buf_rj[k] + *nextai[k]; 3290 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3291 bnzi += nlnk; 3292 nextrow[k]++; nextai[k]++; 3293 } 3294 } 3295 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3296 3297 /* if free space is not available, make more free space */ 3298 if (current_space->local_remaining<bnzi) { 3299 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3300 nspacedouble++; 3301 } 3302 /* copy data into free space, then initialize lnk */ 3303 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3304 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3305 3306 current_space->array += bnzi; 3307 current_space->local_used += bnzi; 3308 current_space->local_remaining -= bnzi; 3309 3310 bi[i+1] = bi[i] + bnzi; 3311 } 3312 3313 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3314 3315 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3316 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3317 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3318 3319 /* create symbolic parallel matrix B_mpi */ 3320 /*---------------------------------------*/ 3321 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3322 if (n==PETSC_DECIDE) { 3323 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3324 } else { 3325 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3326 } 3327 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3328 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3329 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3330 3331 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3332 B_mpi->assembled = PETSC_FALSE; 3333 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3334 merge->bi = bi; 3335 merge->bj = bj; 3336 merge->buf_ri = buf_ri; 3337 merge->buf_rj = buf_rj; 3338 merge->coi = PETSC_NULL; 3339 merge->coj = PETSC_NULL; 3340 merge->owners_co = PETSC_NULL; 3341 3342 /* attach the supporting struct to B_mpi for reuse */ 3343 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3344 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3345 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3346 *mpimat = B_mpi; 3347 3348 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3349 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3350 PetscFunctionReturn(0); 3351 } 3352 3353 static PetscEvent logkey_seqstompi = 0; 3354 #undef __FUNCT__ 3355 #define __FUNCT__ "MatMerge_SeqsToMPI" 3356 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3357 { 3358 PetscErrorCode ierr; 3359 3360 PetscFunctionBegin; 3361 if (!logkey_seqstompi) { 3362 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3363 } 3364 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3365 if (scall == MAT_INITIAL_MATRIX){ 3366 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3367 } 3368 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3369 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3370 PetscFunctionReturn(0); 3371 } 3372 static PetscEvent logkey_getlocalmat = 0; 3373 #undef __FUNCT__ 3374 #define __FUNCT__ "MatGetLocalMat" 3375 /*@C 3376 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3377 3378 Not Collective 3379 3380 Input Parameters: 3381 + A - the matrix 3382 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3383 3384 Output Parameter: 3385 . A_loc - the local sequential matrix generated 3386 3387 Level: developer 3388 3389 @*/ 3390 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3391 { 3392 PetscErrorCode ierr; 3393 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3394 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3395 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3396 PetscScalar *aa=a->a,*ba=b->a,*ca; 3397 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3398 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3399 3400 PetscFunctionBegin; 3401 if (!logkey_getlocalmat) { 3402 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3403 } 3404 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3405 if (scall == MAT_INITIAL_MATRIX){ 3406 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3407 ci[0] = 0; 3408 for (i=0; i<am; i++){ 3409 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3410 } 3411 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3412 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3413 k = 0; 3414 for (i=0; i<am; i++) { 3415 ncols_o = bi[i+1] - bi[i]; 3416 ncols_d = ai[i+1] - ai[i]; 3417 /* off-diagonal portion of A */ 3418 for (jo=0; jo<ncols_o; jo++) { 3419 col = cmap[*bj]; 3420 if (col >= cstart) break; 3421 cj[k] = col; bj++; 3422 ca[k++] = *ba++; 3423 } 3424 /* diagonal portion of A */ 3425 for (j=0; j<ncols_d; j++) { 3426 cj[k] = cstart + *aj++; 3427 ca[k++] = *aa++; 3428 } 3429 /* off-diagonal portion of A */ 3430 for (j=jo; j<ncols_o; j++) { 3431 cj[k] = cmap[*bj++]; 3432 ca[k++] = *ba++; 3433 } 3434 } 3435 /* put together the new matrix */ 3436 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3437 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3438 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3439 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3440 mat->freedata = PETSC_TRUE; 3441 mat->nonew = 0; 3442 } else if (scall == MAT_REUSE_MATRIX){ 3443 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3444 ci = mat->i; cj = mat->j; ca = mat->a; 3445 for (i=0; i<am; i++) { 3446 /* off-diagonal portion of A */ 3447 ncols_o = bi[i+1] - bi[i]; 3448 for (jo=0; jo<ncols_o; jo++) { 3449 col = cmap[*bj]; 3450 if (col >= cstart) break; 3451 *ca++ = *ba++; bj++; 3452 } 3453 /* diagonal portion of A */ 3454 ncols_d = ai[i+1] - ai[i]; 3455 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3456 /* off-diagonal portion of A */ 3457 for (j=jo; j<ncols_o; j++) { 3458 *ca++ = *ba++; bj++; 3459 } 3460 } 3461 } else { 3462 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3463 } 3464 3465 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3466 PetscFunctionReturn(0); 3467 } 3468 3469 static PetscEvent logkey_getlocalmatcondensed = 0; 3470 #undef __FUNCT__ 3471 #define __FUNCT__ "MatGetLocalMatCondensed" 3472 /*@C 3473 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3474 3475 Not Collective 3476 3477 Input Parameters: 3478 + A - the matrix 3479 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3480 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3481 3482 Output Parameter: 3483 . A_loc - the local sequential matrix generated 3484 3485 Level: developer 3486 3487 @*/ 3488 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3489 { 3490 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3491 PetscErrorCode ierr; 3492 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3493 IS isrowa,iscola; 3494 Mat *aloc; 3495 3496 PetscFunctionBegin; 3497 if (!logkey_getlocalmatcondensed) { 3498 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3499 } 3500 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3501 if (!row){ 3502 start = A->rmap.rstart; end = A->rmap.rend; 3503 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3504 } else { 3505 isrowa = *row; 3506 } 3507 if (!col){ 3508 start = A->cmap.rstart; 3509 cmap = a->garray; 3510 nzA = a->A->cmap.n; 3511 nzB = a->B->cmap.n; 3512 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3513 ncols = 0; 3514 for (i=0; i<nzB; i++) { 3515 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3516 else break; 3517 } 3518 imark = i; 3519 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3520 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3521 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3522 ierr = PetscFree(idx);CHKERRQ(ierr); 3523 } else { 3524 iscola = *col; 3525 } 3526 if (scall != MAT_INITIAL_MATRIX){ 3527 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3528 aloc[0] = *A_loc; 3529 } 3530 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3531 *A_loc = aloc[0]; 3532 ierr = PetscFree(aloc);CHKERRQ(ierr); 3533 if (!row){ 3534 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3535 } 3536 if (!col){ 3537 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3538 } 3539 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3540 PetscFunctionReturn(0); 3541 } 3542 3543 static PetscEvent logkey_GetBrowsOfAcols = 0; 3544 #undef __FUNCT__ 3545 #define __FUNCT__ "MatGetBrowsOfAcols" 3546 /*@C 3547 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3548 3549 Collective on Mat 3550 3551 Input Parameters: 3552 + A,B - the matrices in mpiaij format 3553 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3554 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3555 3556 Output Parameter: 3557 + rowb, colb - index sets of rows and columns of B to extract 3558 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 3559 - B_seq - the sequential matrix generated 3560 3561 Level: developer 3562 3563 @*/ 3564 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3565 { 3566 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3567 PetscErrorCode ierr; 3568 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3569 IS isrowb,iscolb; 3570 Mat *bseq; 3571 3572 PetscFunctionBegin; 3573 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3574 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3575 } 3576 if (!logkey_GetBrowsOfAcols) { 3577 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3578 } 3579 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3580 3581 if (scall == MAT_INITIAL_MATRIX){ 3582 start = A->cmap.rstart; 3583 cmap = a->garray; 3584 nzA = a->A->cmap.n; 3585 nzB = a->B->cmap.n; 3586 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3587 ncols = 0; 3588 for (i=0; i<nzB; i++) { /* row < local row index */ 3589 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3590 else break; 3591 } 3592 imark = i; 3593 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3594 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3595 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3596 ierr = PetscFree(idx);CHKERRQ(ierr); 3597 *brstart = imark; 3598 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 3599 } else { 3600 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3601 isrowb = *rowb; iscolb = *colb; 3602 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3603 bseq[0] = *B_seq; 3604 } 3605 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3606 *B_seq = bseq[0]; 3607 ierr = PetscFree(bseq);CHKERRQ(ierr); 3608 if (!rowb){ 3609 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3610 } else { 3611 *rowb = isrowb; 3612 } 3613 if (!colb){ 3614 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3615 } else { 3616 *colb = iscolb; 3617 } 3618 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3619 PetscFunctionReturn(0); 3620 } 3621 3622 static PetscEvent logkey_GetBrowsOfAocols = 0; 3623 #undef __FUNCT__ 3624 #define __FUNCT__ "MatGetBrowsOfAoCols" 3625 /*@C 3626 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3627 of the OFF-DIAGONAL portion of local A 3628 3629 Collective on Mat 3630 3631 Input Parameters: 3632 + A,B - the matrices in mpiaij format 3633 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3634 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3635 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3636 3637 Output Parameter: 3638 + B_oth - the sequential matrix generated 3639 3640 Level: developer 3641 3642 @*/ 3643 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3644 { 3645 VecScatter_MPI_General *gen_to,*gen_from; 3646 PetscErrorCode ierr; 3647 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3648 Mat_SeqAIJ *b_oth; 3649 VecScatter ctx=a->Mvctx; 3650 MPI_Comm comm=ctx->comm; 3651 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3652 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 3653 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3654 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3655 MPI_Request *rwaits,*swaits; 3656 MPI_Status *sstatus,rstatus; 3657 PetscInt *cols; 3658 PetscScalar *vals; 3659 PetscMPIInt j; 3660 3661 PetscFunctionBegin; 3662 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3663 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3664 } 3665 if (!logkey_GetBrowsOfAocols) { 3666 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3667 } 3668 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3669 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3670 3671 gen_to = (VecScatter_MPI_General*)ctx->todata; 3672 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3673 rvalues = gen_from->values; /* holds the length of sending row */ 3674 svalues = gen_to->values; /* holds the length of receiving row */ 3675 nrecvs = gen_from->n; 3676 nsends = gen_to->n; 3677 rwaits = gen_from->requests; 3678 swaits = gen_to->requests; 3679 srow = gen_to->indices; /* local row index to be sent */ 3680 rstarts = gen_from->starts; 3681 sstarts = gen_to->starts; 3682 rprocs = gen_from->procs; 3683 sprocs = gen_to->procs; 3684 sstatus = gen_to->sstatus; 3685 3686 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3687 if (scall == MAT_INITIAL_MATRIX){ 3688 /* i-array */ 3689 /*---------*/ 3690 /* post receives */ 3691 for (i=0; i<nrecvs; i++){ 3692 rowlen = (PetscInt*)rvalues + rstarts[i]; 3693 nrows = rstarts[i+1]-rstarts[i]; 3694 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3695 } 3696 3697 /* pack the outgoing message */ 3698 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3699 rstartsj = sstartsj + nsends +1; 3700 sstartsj[0] = 0; rstartsj[0] = 0; 3701 len = 0; /* total length of j or a array to be sent */ 3702 k = 0; 3703 for (i=0; i<nsends; i++){ 3704 rowlen = (PetscInt*)svalues + sstarts[i]; 3705 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3706 for (j=0; j<nrows; j++) { 3707 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 3708 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3709 len += rowlen[j]; 3710 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3711 k++; 3712 } 3713 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3714 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3715 } 3716 /* recvs and sends of i-array are completed */ 3717 i = nrecvs; 3718 while (i--) { 3719 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3720 } 3721 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3722 /* allocate buffers for sending j and a arrays */ 3723 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3724 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3725 3726 /* create i-array of B_oth */ 3727 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3728 b_othi[0] = 0; 3729 len = 0; /* total length of j or a array to be received */ 3730 k = 0; 3731 for (i=0; i<nrecvs; i++){ 3732 rowlen = (PetscInt*)rvalues + rstarts[i]; 3733 nrows = rstarts[i+1]-rstarts[i]; 3734 for (j=0; j<nrows; j++) { 3735 b_othi[k+1] = b_othi[k] + rowlen[j]; 3736 len += rowlen[j]; k++; 3737 } 3738 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3739 } 3740 3741 /* allocate space for j and a arrrays of B_oth */ 3742 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3743 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3744 3745 /* j-array */ 3746 /*---------*/ 3747 /* post receives of j-array */ 3748 for (i=0; i<nrecvs; i++){ 3749 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3750 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3751 } 3752 k = 0; 3753 for (i=0; i<nsends; i++){ 3754 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3755 bufJ = bufj+sstartsj[i]; 3756 for (j=0; j<nrows; j++) { 3757 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3758 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3759 for (l=0; l<ncols; l++){ 3760 *bufJ++ = cols[l]; 3761 } 3762 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3763 } 3764 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3765 } 3766 3767 /* recvs and sends of j-array are completed */ 3768 i = nrecvs; 3769 while (i--) { 3770 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3771 } 3772 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3773 } else if (scall == MAT_REUSE_MATRIX){ 3774 sstartsj = *startsj; 3775 rstartsj = sstartsj + nsends +1; 3776 bufa = *bufa_ptr; 3777 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3778 b_otha = b_oth->a; 3779 } else { 3780 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3781 } 3782 3783 /* a-array */ 3784 /*---------*/ 3785 /* post receives of a-array */ 3786 for (i=0; i<nrecvs; i++){ 3787 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3788 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3789 } 3790 k = 0; 3791 for (i=0; i<nsends; i++){ 3792 nrows = sstarts[i+1]-sstarts[i]; 3793 bufA = bufa+sstartsj[i]; 3794 for (j=0; j<nrows; j++) { 3795 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3796 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3797 for (l=0; l<ncols; l++){ 3798 *bufA++ = vals[l]; 3799 } 3800 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3801 3802 } 3803 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3804 } 3805 /* recvs and sends of a-array are completed */ 3806 i = nrecvs; 3807 while (i--) { 3808 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3809 } 3810 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3811 3812 if (scall == MAT_INITIAL_MATRIX){ 3813 /* put together the new matrix */ 3814 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3815 3816 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3817 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3818 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3819 b_oth->freedata = PETSC_TRUE; 3820 b_oth->nonew = 0; 3821 3822 ierr = PetscFree(bufj);CHKERRQ(ierr); 3823 if (!startsj || !bufa_ptr){ 3824 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3825 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3826 } else { 3827 *startsj = sstartsj; 3828 *bufa_ptr = bufa; 3829 } 3830 } 3831 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3832 3833 PetscFunctionReturn(0); 3834 } 3835 3836 #undef __FUNCT__ 3837 #define __FUNCT__ "MatGetCommunicationStructs" 3838 /*@C 3839 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 3840 3841 Not Collective 3842 3843 Input Parameters: 3844 . A - The matrix in mpiaij format 3845 3846 Output Parameter: 3847 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 3848 . colmap - A map from global column index to local index into lvec 3849 - multScatter - A scatter from the argument of a matrix-vector product to lvec 3850 3851 Level: developer 3852 3853 @*/ 3854 #if defined (PETSC_USE_CTABLE) 3855 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 3856 #else 3857 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 3858 #endif 3859 { 3860 Mat_MPIAIJ *a; 3861 3862 PetscFunctionBegin; 3863 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 3864 PetscValidPointer(lvec, 2) 3865 PetscValidPointer(colmap, 3) 3866 PetscValidPointer(multScatter, 4) 3867 a = (Mat_MPIAIJ *) A->data; 3868 if (lvec) *lvec = a->lvec; 3869 if (colmap) *colmap = a->colmap; 3870 if (multScatter) *multScatter = a->Mvctx; 3871 PetscFunctionReturn(0); 3872 } 3873 3874 /*MC 3875 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3876 3877 Options Database Keys: 3878 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3879 3880 Level: beginner 3881 3882 .seealso: MatCreateMPIAIJ 3883 M*/ 3884 3885 EXTERN_C_BEGIN 3886 #undef __FUNCT__ 3887 #define __FUNCT__ "MatCreate_MPIAIJ" 3888 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3889 { 3890 Mat_MPIAIJ *b; 3891 PetscErrorCode ierr; 3892 PetscMPIInt size; 3893 3894 PetscFunctionBegin; 3895 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3896 3897 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3898 B->data = (void*)b; 3899 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3900 B->factor = 0; 3901 B->rmap.bs = 1; 3902 B->assembled = PETSC_FALSE; 3903 B->mapping = 0; 3904 3905 B->insertmode = NOT_SET_VALUES; 3906 b->size = size; 3907 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3908 3909 /* build cache for off array entries formed */ 3910 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3911 b->donotstash = PETSC_FALSE; 3912 b->colmap = 0; 3913 b->garray = 0; 3914 b->roworiented = PETSC_TRUE; 3915 3916 /* stuff used for matrix vector multiply */ 3917 b->lvec = PETSC_NULL; 3918 b->Mvctx = PETSC_NULL; 3919 3920 /* stuff for MatGetRow() */ 3921 b->rowindices = 0; 3922 b->rowvalues = 0; 3923 b->getrowactive = PETSC_FALSE; 3924 3925 3926 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3927 "MatStoreValues_MPIAIJ", 3928 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3929 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3930 "MatRetrieveValues_MPIAIJ", 3931 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3932 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3933 "MatGetDiagonalBlock_MPIAIJ", 3934 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3935 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3936 "MatIsTranspose_MPIAIJ", 3937 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3938 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3939 "MatMPIAIJSetPreallocation_MPIAIJ", 3940 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3941 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3942 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3943 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3944 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3945 "MatDiagonalScaleLocal_MPIAIJ", 3946 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3947 PetscFunctionReturn(0); 3948 } 3949 EXTERN_C_END 3950 3951 /* 3952 Special version for direct calls from Fortran 3953 */ 3954 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3955 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 3956 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3957 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 3958 #endif 3959 3960 /* Change these macros so can be used in void function */ 3961 #undef CHKERRQ 3962 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 3963 #undef SETERRQ2 3964 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 3965 #undef SETERRQ 3966 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 3967 3968 EXTERN_C_BEGIN 3969 #undef __FUNCT__ 3970 #define __FUNCT__ "matsetvaluesmpiaij_" 3971 void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv) 3972 { 3973 Mat mat = *mmat; 3974 PetscInt m = *mm, n = *mn; 3975 InsertMode addv = *maddv; 3976 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3977 PetscScalar value; 3978 PetscErrorCode ierr; 3979 3980 MatPreallocated(mat); 3981 if (mat->insertmode == NOT_SET_VALUES) { 3982 mat->insertmode = addv; 3983 } 3984 #if defined(PETSC_USE_DEBUG) 3985 else if (mat->insertmode != addv) { 3986 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3987 } 3988 #endif 3989 { 3990 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 3991 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 3992 PetscTruth roworiented = aij->roworiented; 3993 3994 /* Some Variables required in the macro */ 3995 Mat A = aij->A; 3996 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3997 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 3998 PetscScalar *aa = a->a; 3999 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4000 Mat B = aij->B; 4001 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4002 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4003 PetscScalar *ba = b->a; 4004 4005 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4006 PetscInt nonew = a->nonew; 4007 PetscScalar *ap1,*ap2; 4008 4009 PetscFunctionBegin; 4010 for (i=0; i<m; i++) { 4011 if (im[i] < 0) continue; 4012 #if defined(PETSC_USE_DEBUG) 4013 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4014 #endif 4015 if (im[i] >= rstart && im[i] < rend) { 4016 row = im[i] - rstart; 4017 lastcol1 = -1; 4018 rp1 = aj + ai[row]; 4019 ap1 = aa + ai[row]; 4020 rmax1 = aimax[row]; 4021 nrow1 = ailen[row]; 4022 low1 = 0; 4023 high1 = nrow1; 4024 lastcol2 = -1; 4025 rp2 = bj + bi[row]; 4026 ap2 = ba + bi[row]; 4027 rmax2 = bimax[row]; 4028 nrow2 = bilen[row]; 4029 low2 = 0; 4030 high2 = nrow2; 4031 4032 for (j=0; j<n; j++) { 4033 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4034 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4035 if (in[j] >= cstart && in[j] < cend){ 4036 col = in[j] - cstart; 4037 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4038 } else if (in[j] < 0) continue; 4039 #if defined(PETSC_USE_DEBUG) 4040 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4041 #endif 4042 else { 4043 if (mat->was_assembled) { 4044 if (!aij->colmap) { 4045 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4046 } 4047 #if defined (PETSC_USE_CTABLE) 4048 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4049 col--; 4050 #else 4051 col = aij->colmap[in[j]] - 1; 4052 #endif 4053 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4054 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4055 col = in[j]; 4056 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4057 B = aij->B; 4058 b = (Mat_SeqAIJ*)B->data; 4059 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4060 rp2 = bj + bi[row]; 4061 ap2 = ba + bi[row]; 4062 rmax2 = bimax[row]; 4063 nrow2 = bilen[row]; 4064 low2 = 0; 4065 high2 = nrow2; 4066 bm = aij->B->rmap.n; 4067 ba = b->a; 4068 } 4069 } else col = in[j]; 4070 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4071 } 4072 } 4073 } else { 4074 if (!aij->donotstash) { 4075 if (roworiented) { 4076 if (ignorezeroentries && v[i*n] == 0.0) continue; 4077 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4078 } else { 4079 if (ignorezeroentries && v[i] == 0.0) continue; 4080 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4081 } 4082 } 4083 } 4084 }} 4085 PetscFunctionReturnVoid(); 4086 } 4087 EXTERN_C_END 4088