1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->cmap.n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 141 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->rmap.n; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatGetValues_MPIAIJ" 241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 242 { 243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 244 PetscErrorCode ierr; 245 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 246 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 247 248 PetscFunctionBegin; 249 for (i=0; i<m; i++) { 250 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 251 if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1); 252 if (idxm[i] >= rstart && idxm[i] < rend) { 253 row = idxm[i] - rstart; 254 for (j=0; j<n; j++) { 255 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 256 if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1); 257 if (idxn[j] >= cstart && idxn[j] < cend){ 258 col = idxn[j] - cstart; 259 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 260 } else { 261 if (!aij->colmap) { 262 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 263 } 264 #if defined (PETSC_USE_CTABLE) 265 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 266 col --; 267 #else 268 col = aij->colmap[idxn[j]] - 1; 269 #endif 270 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 271 else { 272 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 273 } 274 } 275 } 276 } else { 277 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 278 } 279 } 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 if (aij->donotstash) { 294 PetscFunctionReturn(0); 295 } 296 297 /* make sure all processors are either in INSERTMODE or ADDMODE */ 298 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 299 if (addv == (ADD_VALUES|INSERT_VALUES)) { 300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 301 } 302 mat->insertmode = addv; /* in case this processor had no cache */ 303 304 ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr); 305 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 306 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 313 { 314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 315 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 316 PetscErrorCode ierr; 317 PetscMPIInt n; 318 PetscInt i,j,rstart,ncols,flg; 319 PetscInt *row,*col,other_disassembled; 320 PetscScalar *val; 321 InsertMode addv = mat->insertmode; 322 323 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 324 PetscFunctionBegin; 325 if (!aij->donotstash) { 326 while (1) { 327 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 328 if (!flg) break; 329 330 for (i=0; i<n;) { 331 /* Now identify the consecutive vals belonging to the same row */ 332 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 333 if (j < n) ncols = j-i; 334 else ncols = n-i; 335 /* Now assemble all these values with a single function call */ 336 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 337 i = j; 338 } 339 } 340 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 341 } 342 a->compressedrow.use = PETSC_FALSE; 343 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 344 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 345 346 /* determine if any processor has disassembled, if so we must 347 also disassemble ourselfs, in order that we may reassemble. */ 348 /* 349 if nonzero structure of submatrix B cannot change then we know that 350 no processor disassembled thus we can skip this stuff 351 */ 352 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 353 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 354 if (mat->was_assembled && !other_disassembled) { 355 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 356 } 357 } 358 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 359 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 360 } 361 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 362 ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */ 363 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 364 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 365 366 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 367 aij->rowvalues = 0; 368 369 /* used by MatAXPY() */ 370 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 371 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 372 373 PetscFunctionReturn(0); 374 } 375 376 #undef __FUNCT__ 377 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 379 { 380 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 381 PetscErrorCode ierr; 382 383 PetscFunctionBegin; 384 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 385 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 386 PetscFunctionReturn(0); 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatZeroRows_MPIAIJ" 391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 392 { 393 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 394 PetscErrorCode ierr; 395 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 396 PetscInt i,*owners = A->rmap.range; 397 PetscInt *nprocs,j,idx,nsends,row; 398 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 399 PetscInt *rvalues,count,base,slen,*source; 400 PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart; 401 MPI_Comm comm = A->comm; 402 MPI_Request *send_waits,*recv_waits; 403 MPI_Status recv_status,*send_status; 404 #if defined(PETSC_DEBUG) 405 PetscTruth found = PETSC_FALSE; 406 #endif 407 408 PetscFunctionBegin; 409 /* first count number of contributors to each processor */ 410 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 411 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 412 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 413 j = 0; 414 for (i=0; i<N; i++) { 415 if (lastidx > (idx = rows[i])) j = 0; 416 lastidx = idx; 417 for (; j<size; j++) { 418 if (idx >= owners[j] && idx < owners[j+1]) { 419 nprocs[2*j]++; 420 nprocs[2*j+1] = 1; 421 owner[i] = j; 422 #if defined(PETSC_DEBUG) 423 found = PETSC_TRUE; 424 #endif 425 break; 426 } 427 } 428 #if defined(PETSC_DEBUG) 429 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 430 found = PETSC_FALSE; 431 #endif 432 } 433 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 434 435 /* inform other processors of number of messages and max length*/ 436 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 437 438 /* post receives: */ 439 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 440 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 441 for (i=0; i<nrecvs; i++) { 442 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 443 } 444 445 /* do sends: 446 1) starts[i] gives the starting index in svalues for stuff going to 447 the ith processor 448 */ 449 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 450 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 451 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 452 starts[0] = 0; 453 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 454 for (i=0; i<N; i++) { 455 svalues[starts[owner[i]]++] = rows[i]; 456 } 457 458 starts[0] = 0; 459 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 460 count = 0; 461 for (i=0; i<size; i++) { 462 if (nprocs[2*i+1]) { 463 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 464 } 465 } 466 ierr = PetscFree(starts);CHKERRQ(ierr); 467 468 base = owners[rank]; 469 470 /* wait on receives */ 471 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 472 source = lens + nrecvs; 473 count = nrecvs; slen = 0; 474 while (count) { 475 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 476 /* unpack receives into our local space */ 477 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 478 source[imdex] = recv_status.MPI_SOURCE; 479 lens[imdex] = n; 480 slen += n; 481 count--; 482 } 483 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 484 485 /* move the data into the send scatter */ 486 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 487 count = 0; 488 for (i=0; i<nrecvs; i++) { 489 values = rvalues + i*nmax; 490 for (j=0; j<lens[i]; j++) { 491 lrows[count++] = values[j] - base; 492 } 493 } 494 ierr = PetscFree(rvalues);CHKERRQ(ierr); 495 ierr = PetscFree(lens);CHKERRQ(ierr); 496 ierr = PetscFree(owner);CHKERRQ(ierr); 497 ierr = PetscFree(nprocs);CHKERRQ(ierr); 498 499 /* actually zap the local rows */ 500 /* 501 Zero the required rows. If the "diagonal block" of the matrix 502 is square and the user wishes to set the diagonal we use separate 503 code so that MatSetValues() is not called for each diagonal allocating 504 new memory, thus calling lots of mallocs and slowing things down. 505 506 Contributed by: Matthew Knepley 507 */ 508 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 509 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 510 if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) { 511 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 512 } else if (diag != 0.0) { 513 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 514 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 515 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 516 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 517 } 518 for (i = 0; i < slen; i++) { 519 row = lrows[i] + rstart; 520 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 521 } 522 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 523 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 524 } else { 525 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 526 } 527 ierr = PetscFree(lrows);CHKERRQ(ierr); 528 529 /* wait on sends */ 530 if (nsends) { 531 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 532 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 533 ierr = PetscFree(send_status);CHKERRQ(ierr); 534 } 535 ierr = PetscFree(send_waits);CHKERRQ(ierr); 536 ierr = PetscFree(svalues);CHKERRQ(ierr); 537 538 PetscFunctionReturn(0); 539 } 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatMult_MPIAIJ" 543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 544 { 545 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 546 PetscErrorCode ierr; 547 PetscInt nt; 548 549 PetscFunctionBegin; 550 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 551 if (nt != A->cmap.n) { 552 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt); 553 } 554 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 555 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 556 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 557 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 558 PetscFunctionReturn(0); 559 } 560 561 #undef __FUNCT__ 562 #define __FUNCT__ "MatMultAdd_MPIAIJ" 563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 564 { 565 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 566 PetscErrorCode ierr; 567 568 PetscFunctionBegin; 569 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 570 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 571 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 572 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 579 { 580 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 581 PetscErrorCode ierr; 582 PetscTruth merged; 583 584 PetscFunctionBegin; 585 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 586 /* do nondiagonal part */ 587 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 588 if (!merged) { 589 /* send it on its way */ 590 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 591 /* do local part */ 592 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 593 /* receive remote parts: note this assumes the values are not actually */ 594 /* added in yy until the next line, */ 595 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 596 } else { 597 /* do local part */ 598 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 599 /* send it on its way */ 600 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 601 /* values actually were received in the Begin() but we need to call this nop */ 602 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 603 } 604 PetscFunctionReturn(0); 605 } 606 607 EXTERN_C_BEGIN 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 611 { 612 MPI_Comm comm; 613 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 614 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 615 IS Me,Notme; 616 PetscErrorCode ierr; 617 PetscInt M,N,first,last,*notme,i; 618 PetscMPIInt size; 619 620 PetscFunctionBegin; 621 622 /* Easy test: symmetric diagonal block */ 623 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 624 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 625 if (!*f) PetscFunctionReturn(0); 626 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 627 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 628 if (size == 1) PetscFunctionReturn(0); 629 630 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 631 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 632 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 633 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 634 for (i=0; i<first; i++) notme[i] = i; 635 for (i=last; i<M; i++) notme[i-last+first] = i; 636 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 637 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 638 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 639 Aoff = Aoffs[0]; 640 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 641 Boff = Boffs[0]; 642 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 643 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 644 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 645 ierr = ISDestroy(Me);CHKERRQ(ierr); 646 ierr = ISDestroy(Notme);CHKERRQ(ierr); 647 648 PetscFunctionReturn(0); 649 } 650 EXTERN_C_END 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 655 { 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 PetscErrorCode ierr; 658 659 PetscFunctionBegin; 660 /* do nondiagonal part */ 661 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 662 /* send it on its way */ 663 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 664 /* do local part */ 665 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 666 /* receive remote parts */ 667 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This only works correctly for square matrices where the subblock A->A is the 673 diagonal block 674 */ 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 678 { 679 PetscErrorCode ierr; 680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 681 682 PetscFunctionBegin; 683 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 684 if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) { 685 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 686 } 687 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatScale_MPIAIJ" 693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 694 { 695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 696 PetscErrorCode ierr; 697 698 PetscFunctionBegin; 699 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 700 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 701 PetscFunctionReturn(0); 702 } 703 704 #undef __FUNCT__ 705 #define __FUNCT__ "MatDestroy_MPIAIJ" 706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 709 PetscErrorCode ierr; 710 711 PetscFunctionBegin; 712 #if defined(PETSC_USE_LOG) 713 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N); 714 #endif 715 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 716 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 717 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 718 #if defined (PETSC_USE_CTABLE) 719 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 720 #else 721 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 722 #endif 723 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 724 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 725 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 726 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 727 ierr = PetscFree(aij);CHKERRQ(ierr); 728 729 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 730 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 731 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 732 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 733 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatView_MPIAIJ_Binary" 742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 743 { 744 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 745 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 746 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 747 PetscErrorCode ierr; 748 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 749 int fd; 750 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 751 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz; 752 PetscScalar *column_values; 753 754 PetscFunctionBegin; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 757 nz = A->nz + B->nz; 758 if (!rank) { 759 header[0] = MAT_FILE_COOKIE; 760 header[1] = mat->rmap.N; 761 header[2] = mat->cmap.N; 762 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 763 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 765 /* get largest number of rows any processor has */ 766 rlen = mat->rmap.n; 767 range = mat->rmap.range; 768 for (i=1; i<size; i++) { 769 rlen = PetscMax(rlen,range[i+1] - range[i]); 770 } 771 } else { 772 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 773 rlen = mat->rmap.n; 774 } 775 776 /* load up the local row counts */ 777 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 778 for (i=0; i<mat->rmap.n; i++) { 779 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 780 } 781 782 /* store the row lengths to the file */ 783 if (!rank) { 784 MPI_Status status; 785 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 786 for (i=1; i<size; i++) { 787 rlen = range[i+1] - range[i]; 788 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 789 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 790 } 791 } else { 792 ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 793 } 794 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 795 796 /* load up the local column indices */ 797 nzmax = nz; /* )th processor needs space a largest processor needs */ 798 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 799 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 800 cnt = 0; 801 for (i=0; i<mat->rmap.n; i++) { 802 for (j=B->i[i]; j<B->i[i+1]; j++) { 803 if ( (col = garray[B->j[j]]) > cstart) break; 804 column_indices[cnt++] = col; 805 } 806 for (k=A->i[i]; k<A->i[i+1]; k++) { 807 column_indices[cnt++] = A->j[k] + cstart; 808 } 809 for (; j<B->i[i+1]; j++) { 810 column_indices[cnt++] = garray[B->j[j]]; 811 } 812 } 813 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 814 815 /* store the column indices to the file */ 816 if (!rank) { 817 MPI_Status status; 818 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 819 for (i=1; i<size; i++) { 820 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 821 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 822 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 823 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 824 } 825 } else { 826 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 827 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 828 } 829 ierr = PetscFree(column_indices);CHKERRQ(ierr); 830 831 /* load up the local column values */ 832 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 833 cnt = 0; 834 for (i=0; i<mat->rmap.n; i++) { 835 for (j=B->i[i]; j<B->i[i+1]; j++) { 836 if ( garray[B->j[j]] > cstart) break; 837 column_values[cnt++] = B->a[j]; 838 } 839 for (k=A->i[i]; k<A->i[i+1]; k++) { 840 column_values[cnt++] = A->a[k]; 841 } 842 for (; j<B->i[i+1]; j++) { 843 column_values[cnt++] = B->a[j]; 844 } 845 } 846 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 847 848 /* store the column values to the file */ 849 if (!rank) { 850 MPI_Status status; 851 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 852 for (i=1; i<size; i++) { 853 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 854 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 855 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 856 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 857 } 858 } else { 859 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 860 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 861 } 862 ierr = PetscFree(column_values);CHKERRQ(ierr); 863 PetscFunctionReturn(0); 864 } 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 869 { 870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 871 PetscErrorCode ierr; 872 PetscMPIInt rank = aij->rank,size = aij->size; 873 PetscTruth isdraw,iascii,isbinary; 874 PetscViewer sviewer; 875 PetscViewerFormat format; 876 877 PetscFunctionBegin; 878 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 879 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 880 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 881 if (iascii) { 882 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 883 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 884 MatInfo info; 885 PetscTruth inodes; 886 887 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 888 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 889 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 890 if (!inodes) { 891 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 892 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 893 } else { 894 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 895 rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 896 } 897 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 899 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 900 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 901 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 902 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } else if (format == PETSC_VIEWER_ASCII_INFO) { 905 PetscInt inodecount,inodelimit,*inodes; 906 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 907 if (inodes) { 908 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 909 } else { 910 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 911 } 912 PetscFunctionReturn(0); 913 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 914 PetscFunctionReturn(0); 915 } 916 } else if (isbinary) { 917 if (size == 1) { 918 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 919 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 920 } else { 921 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 922 } 923 PetscFunctionReturn(0); 924 } else if (isdraw) { 925 PetscDraw draw; 926 PetscTruth isnull; 927 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 928 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 929 } 930 931 if (size == 1) { 932 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 933 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 934 } else { 935 /* assemble the entire matrix onto first processor. */ 936 Mat A; 937 Mat_SeqAIJ *Aloc; 938 PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 939 PetscScalar *a; 940 941 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 942 if (!rank) { 943 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 944 } else { 945 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 946 } 947 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 948 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 949 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 950 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 951 952 /* copy over the A part */ 953 Aloc = (Mat_SeqAIJ*)aij->A->data; 954 m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 955 row = mat->rmap.rstart; 956 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;} 957 for (i=0; i<m; i++) { 958 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 959 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 960 } 961 aj = Aloc->j; 962 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;} 963 964 /* copy over the B part */ 965 Aloc = (Mat_SeqAIJ*)aij->B->data; 966 m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 967 row = mat->rmap.rstart; 968 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 969 ct = cols; 970 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 971 for (i=0; i<m; i++) { 972 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 973 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 974 } 975 ierr = PetscFree(ct);CHKERRQ(ierr); 976 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 /* 979 Everyone has to call to draw the matrix since the graphics waits are 980 synchronized across all processors that share the PetscDraw object 981 */ 982 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 983 if (!rank) { 984 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 985 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 986 } 987 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 988 ierr = MatDestroy(A);CHKERRQ(ierr); 989 } 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatView_MPIAIJ" 995 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 996 { 997 PetscErrorCode ierr; 998 PetscTruth iascii,isdraw,issocket,isbinary; 999 1000 PetscFunctionBegin; 1001 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1005 if (iascii || isdraw || isbinary || issocket) { 1006 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1007 } else { 1008 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1009 } 1010 PetscFunctionReturn(0); 1011 } 1012 1013 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatRelax_MPIAIJ" 1017 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1018 { 1019 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1020 PetscErrorCode ierr; 1021 Vec bb1; 1022 1023 PetscFunctionBegin; 1024 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1025 1026 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1027 1028 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1029 if (flag & SOR_ZERO_INITIAL_GUESS) { 1030 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1031 its--; 1032 } 1033 1034 while (its--) { 1035 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1036 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1037 1038 /* update rhs: bb1 = bb - B*x */ 1039 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1040 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1041 1042 /* local sweep */ 1043 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1044 CHKERRQ(ierr); 1045 } 1046 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1047 if (flag & SOR_ZERO_INITIAL_GUESS) { 1048 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1049 its--; 1050 } 1051 while (its--) { 1052 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1053 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1054 1055 /* update rhs: bb1 = bb - B*x */ 1056 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1057 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1058 1059 /* local sweep */ 1060 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1061 CHKERRQ(ierr); 1062 } 1063 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1064 if (flag & SOR_ZERO_INITIAL_GUESS) { 1065 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1066 its--; 1067 } 1068 while (its--) { 1069 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1070 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1071 1072 /* update rhs: bb1 = bb - B*x */ 1073 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1074 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1075 1076 /* local sweep */ 1077 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1078 CHKERRQ(ierr); 1079 } 1080 } else { 1081 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1082 } 1083 1084 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatPermute_MPIAIJ" 1090 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1091 { 1092 MPI_Comm comm,pcomm; 1093 PetscInt first,local_size,nrows,*rows; 1094 int ntids; 1095 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1096 PetscErrorCode ierr; 1097 1098 PetscFunctionBegin; 1099 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1100 /* make a collective version of 'rowp' */ 1101 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1102 if (pcomm==comm) { 1103 crowp = rowp; 1104 } else { 1105 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1106 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1107 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1108 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1109 } 1110 /* collect the global row permutation and invert it */ 1111 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1112 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1113 if (pcomm!=comm) { 1114 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1115 } 1116 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1117 /* get the local target indices */ 1118 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1119 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1120 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1121 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1122 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1123 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1124 /* the column permutation is so much easier; 1125 make a local version of 'colp' and invert it */ 1126 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1127 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1128 if (ntids==1) { 1129 lcolp = colp; 1130 } else { 1131 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1132 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1133 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1134 } 1135 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1136 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1137 if (ntids>1) { 1138 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1139 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1140 } 1141 /* now we just get the submatrix */ 1142 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1143 /* clean up */ 1144 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1145 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1151 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1152 { 1153 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1154 Mat A = mat->A,B = mat->B; 1155 PetscErrorCode ierr; 1156 PetscReal isend[5],irecv[5]; 1157 1158 PetscFunctionBegin; 1159 info->block_size = 1.0; 1160 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1161 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1162 isend[3] = info->memory; isend[4] = info->mallocs; 1163 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1164 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1165 isend[3] += info->memory; isend[4] += info->mallocs; 1166 if (flag == MAT_LOCAL) { 1167 info->nz_used = isend[0]; 1168 info->nz_allocated = isend[1]; 1169 info->nz_unneeded = isend[2]; 1170 info->memory = isend[3]; 1171 info->mallocs = isend[4]; 1172 } else if (flag == MAT_GLOBAL_MAX) { 1173 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1174 info->nz_used = irecv[0]; 1175 info->nz_allocated = irecv[1]; 1176 info->nz_unneeded = irecv[2]; 1177 info->memory = irecv[3]; 1178 info->mallocs = irecv[4]; 1179 } else if (flag == MAT_GLOBAL_SUM) { 1180 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1181 info->nz_used = irecv[0]; 1182 info->nz_allocated = irecv[1]; 1183 info->nz_unneeded = irecv[2]; 1184 info->memory = irecv[3]; 1185 info->mallocs = irecv[4]; 1186 } 1187 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1188 info->fill_ratio_needed = 0; 1189 info->factor_mallocs = 0; 1190 info->rows_global = (double)matin->rmap.N; 1191 info->columns_global = (double)matin->cmap.N; 1192 info->rows_local = (double)matin->rmap.n; 1193 info->columns_local = (double)matin->cmap.N; 1194 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatSetOption_MPIAIJ" 1200 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1201 { 1202 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1203 PetscErrorCode ierr; 1204 1205 PetscFunctionBegin; 1206 switch (op) { 1207 case MAT_NO_NEW_NONZERO_LOCATIONS: 1208 case MAT_YES_NEW_NONZERO_LOCATIONS: 1209 case MAT_COLUMNS_UNSORTED: 1210 case MAT_COLUMNS_SORTED: 1211 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1212 case MAT_KEEP_ZEROED_ROWS: 1213 case MAT_NEW_NONZERO_LOCATION_ERR: 1214 case MAT_USE_INODES: 1215 case MAT_DO_NOT_USE_INODES: 1216 case MAT_IGNORE_ZERO_ENTRIES: 1217 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1218 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1219 break; 1220 case MAT_ROW_ORIENTED: 1221 a->roworiented = PETSC_TRUE; 1222 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1223 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1224 break; 1225 case MAT_ROWS_SORTED: 1226 case MAT_ROWS_UNSORTED: 1227 case MAT_YES_NEW_DIAGONALS: 1228 ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr); 1229 break; 1230 case MAT_COLUMN_ORIENTED: 1231 a->roworiented = PETSC_FALSE; 1232 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1233 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1234 break; 1235 case MAT_IGNORE_OFF_PROC_ENTRIES: 1236 a->donotstash = PETSC_TRUE; 1237 break; 1238 case MAT_NO_NEW_DIAGONALS: 1239 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1240 case MAT_SYMMETRIC: 1241 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1242 break; 1243 case MAT_STRUCTURALLY_SYMMETRIC: 1244 case MAT_HERMITIAN: 1245 case MAT_SYMMETRY_ETERNAL: 1246 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1247 break; 1248 case MAT_NOT_SYMMETRIC: 1249 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1250 case MAT_NOT_HERMITIAN: 1251 case MAT_NOT_SYMMETRY_ETERNAL: 1252 break; 1253 default: 1254 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 1255 } 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatGetRow_MPIAIJ" 1261 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1262 { 1263 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1264 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1265 PetscErrorCode ierr; 1266 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart; 1267 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend; 1268 PetscInt *cmap,*idx_p; 1269 1270 PetscFunctionBegin; 1271 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1272 mat->getrowactive = PETSC_TRUE; 1273 1274 if (!mat->rowvalues && (idx || v)) { 1275 /* 1276 allocate enough space to hold information from the longest row. 1277 */ 1278 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1279 PetscInt max = 1,tmp; 1280 for (i=0; i<matin->rmap.n; i++) { 1281 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1282 if (max < tmp) { max = tmp; } 1283 } 1284 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1285 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1286 } 1287 1288 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1289 lrow = row - rstart; 1290 1291 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1292 if (!v) {pvA = 0; pvB = 0;} 1293 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1294 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1295 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1296 nztot = nzA + nzB; 1297 1298 cmap = mat->garray; 1299 if (v || idx) { 1300 if (nztot) { 1301 /* Sort by increasing column numbers, assuming A and B already sorted */ 1302 PetscInt imark = -1; 1303 if (v) { 1304 *v = v_p = mat->rowvalues; 1305 for (i=0; i<nzB; i++) { 1306 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1307 else break; 1308 } 1309 imark = i; 1310 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1311 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1312 } 1313 if (idx) { 1314 *idx = idx_p = mat->rowindices; 1315 if (imark > -1) { 1316 for (i=0; i<imark; i++) { 1317 idx_p[i] = cmap[cworkB[i]]; 1318 } 1319 } else { 1320 for (i=0; i<nzB; i++) { 1321 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1322 else break; 1323 } 1324 imark = i; 1325 } 1326 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1327 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1328 } 1329 } else { 1330 if (idx) *idx = 0; 1331 if (v) *v = 0; 1332 } 1333 } 1334 *nz = nztot; 1335 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1336 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1342 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1343 { 1344 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1345 1346 PetscFunctionBegin; 1347 if (!aij->getrowactive) { 1348 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1349 } 1350 aij->getrowactive = PETSC_FALSE; 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatNorm_MPIAIJ" 1356 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1357 { 1358 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1359 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1360 PetscErrorCode ierr; 1361 PetscInt i,j,cstart = mat->cmap.rstart; 1362 PetscReal sum = 0.0; 1363 PetscScalar *v; 1364 1365 PetscFunctionBegin; 1366 if (aij->size == 1) { 1367 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1368 } else { 1369 if (type == NORM_FROBENIUS) { 1370 v = amat->a; 1371 for (i=0; i<amat->nz; i++) { 1372 #if defined(PETSC_USE_COMPLEX) 1373 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1374 #else 1375 sum += (*v)*(*v); v++; 1376 #endif 1377 } 1378 v = bmat->a; 1379 for (i=0; i<bmat->nz; i++) { 1380 #if defined(PETSC_USE_COMPLEX) 1381 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1382 #else 1383 sum += (*v)*(*v); v++; 1384 #endif 1385 } 1386 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1387 *norm = sqrt(*norm); 1388 } else if (type == NORM_1) { /* max column norm */ 1389 PetscReal *tmp,*tmp2; 1390 PetscInt *jj,*garray = aij->garray; 1391 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1392 ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1393 ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr); 1394 *norm = 0.0; 1395 v = amat->a; jj = amat->j; 1396 for (j=0; j<amat->nz; j++) { 1397 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1398 } 1399 v = bmat->a; jj = bmat->j; 1400 for (j=0; j<bmat->nz; j++) { 1401 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1402 } 1403 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1404 for (j=0; j<mat->cmap.N; j++) { 1405 if (tmp2[j] > *norm) *norm = tmp2[j]; 1406 } 1407 ierr = PetscFree(tmp);CHKERRQ(ierr); 1408 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1409 } else if (type == NORM_INFINITY) { /* max row norm */ 1410 PetscReal ntemp = 0.0; 1411 for (j=0; j<aij->A->rmap.n; j++) { 1412 v = amat->a + amat->i[j]; 1413 sum = 0.0; 1414 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1415 sum += PetscAbsScalar(*v); v++; 1416 } 1417 v = bmat->a + bmat->i[j]; 1418 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1419 sum += PetscAbsScalar(*v); v++; 1420 } 1421 if (sum > ntemp) ntemp = sum; 1422 } 1423 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1424 } else { 1425 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1426 } 1427 } 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #undef __FUNCT__ 1432 #define __FUNCT__ "MatTranspose_MPIAIJ" 1433 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1434 { 1435 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1436 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1437 PetscErrorCode ierr; 1438 PetscInt M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct; 1439 Mat B; 1440 PetscScalar *array; 1441 1442 PetscFunctionBegin; 1443 if (!matout && M != N) { 1444 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1445 } 1446 1447 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1448 ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr); 1449 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1450 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1451 1452 /* copy over the A part */ 1453 Aloc = (Mat_SeqAIJ*)a->A->data; 1454 m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1455 row = A->rmap.rstart; 1456 for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;} 1457 for (i=0; i<m; i++) { 1458 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1459 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1460 } 1461 aj = Aloc->j; 1462 for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;} 1463 1464 /* copy over the B part */ 1465 Aloc = (Mat_SeqAIJ*)a->B->data; 1466 m = a->B->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1467 row = A->rmap.rstart; 1468 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1469 ct = cols; 1470 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1471 for (i=0; i<m; i++) { 1472 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1473 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1474 } 1475 ierr = PetscFree(ct);CHKERRQ(ierr); 1476 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1477 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1478 if (matout) { 1479 *matout = B; 1480 } else { 1481 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1482 } 1483 PetscFunctionReturn(0); 1484 } 1485 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1488 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1489 { 1490 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1491 Mat a = aij->A,b = aij->B; 1492 PetscErrorCode ierr; 1493 PetscInt s1,s2,s3; 1494 1495 PetscFunctionBegin; 1496 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1497 if (rr) { 1498 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1499 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1500 /* Overlap communication with computation. */ 1501 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1502 } 1503 if (ll) { 1504 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1505 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1506 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1507 } 1508 /* scale the diagonal block */ 1509 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1510 1511 if (rr) { 1512 /* Do a scatter end and then right scale the off-diagonal block */ 1513 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1514 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1515 } 1516 1517 PetscFunctionReturn(0); 1518 } 1519 1520 1521 #undef __FUNCT__ 1522 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1523 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1524 { 1525 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1526 PetscErrorCode ierr; 1527 1528 PetscFunctionBegin; 1529 if (!a->rank) { 1530 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1531 } 1532 PetscFunctionReturn(0); 1533 } 1534 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1537 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1538 { 1539 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1540 PetscErrorCode ierr; 1541 1542 PetscFunctionBegin; 1543 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1544 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1545 PetscFunctionReturn(0); 1546 } 1547 #undef __FUNCT__ 1548 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1549 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1550 { 1551 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1552 PetscErrorCode ierr; 1553 1554 PetscFunctionBegin; 1555 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1556 PetscFunctionReturn(0); 1557 } 1558 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatEqual_MPIAIJ" 1561 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1562 { 1563 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1564 Mat a,b,c,d; 1565 PetscTruth flg; 1566 PetscErrorCode ierr; 1567 1568 PetscFunctionBegin; 1569 a = matA->A; b = matA->B; 1570 c = matB->A; d = matB->B; 1571 1572 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1573 if (flg) { 1574 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1575 } 1576 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1577 PetscFunctionReturn(0); 1578 } 1579 1580 #undef __FUNCT__ 1581 #define __FUNCT__ "MatCopy_MPIAIJ" 1582 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1583 { 1584 PetscErrorCode ierr; 1585 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1586 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1587 1588 PetscFunctionBegin; 1589 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1590 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1591 /* because of the column compression in the off-processor part of the matrix a->B, 1592 the number of columns in a->B and b->B may be different, hence we cannot call 1593 the MatCopy() directly on the two parts. If need be, we can provide a more 1594 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1595 then copying the submatrices */ 1596 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1597 } else { 1598 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1599 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1600 } 1601 PetscFunctionReturn(0); 1602 } 1603 1604 #undef __FUNCT__ 1605 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1606 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1607 { 1608 PetscErrorCode ierr; 1609 1610 PetscFunctionBegin; 1611 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1612 PetscFunctionReturn(0); 1613 } 1614 1615 #include "petscblaslapack.h" 1616 #undef __FUNCT__ 1617 #define __FUNCT__ "MatAXPY_MPIAIJ" 1618 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1619 { 1620 PetscErrorCode ierr; 1621 PetscInt i; 1622 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1623 PetscBLASInt bnz,one=1; 1624 Mat_SeqAIJ *x,*y; 1625 1626 PetscFunctionBegin; 1627 if (str == SAME_NONZERO_PATTERN) { 1628 PetscScalar alpha = a; 1629 x = (Mat_SeqAIJ *)xx->A->data; 1630 y = (Mat_SeqAIJ *)yy->A->data; 1631 bnz = (PetscBLASInt)x->nz; 1632 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1633 x = (Mat_SeqAIJ *)xx->B->data; 1634 y = (Mat_SeqAIJ *)yy->B->data; 1635 bnz = (PetscBLASInt)x->nz; 1636 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1637 } else if (str == SUBSET_NONZERO_PATTERN) { 1638 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1639 1640 x = (Mat_SeqAIJ *)xx->B->data; 1641 y = (Mat_SeqAIJ *)yy->B->data; 1642 if (y->xtoy && y->XtoY != xx->B) { 1643 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1644 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1645 } 1646 if (!y->xtoy) { /* get xtoy */ 1647 ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1648 y->XtoY = xx->B; 1649 } 1650 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1651 } else { 1652 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1653 } 1654 PetscFunctionReturn(0); 1655 } 1656 1657 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1658 1659 #undef __FUNCT__ 1660 #define __FUNCT__ "MatConjugate_MPIAIJ" 1661 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1662 { 1663 #if defined(PETSC_USE_COMPLEX) 1664 PetscErrorCode ierr; 1665 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1666 1667 PetscFunctionBegin; 1668 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1669 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1670 #else 1671 PetscFunctionBegin; 1672 #endif 1673 PetscFunctionReturn(0); 1674 } 1675 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "MatRealPart_MPIAIJ" 1678 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1679 { 1680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1681 PetscErrorCode ierr; 1682 1683 PetscFunctionBegin; 1684 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1685 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1686 PetscFunctionReturn(0); 1687 } 1688 1689 #undef __FUNCT__ 1690 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1691 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1692 { 1693 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1694 PetscErrorCode ierr; 1695 1696 PetscFunctionBegin; 1697 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1698 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 /* -------------------------------------------------------------------*/ 1703 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1704 MatGetRow_MPIAIJ, 1705 MatRestoreRow_MPIAIJ, 1706 MatMult_MPIAIJ, 1707 /* 4*/ MatMultAdd_MPIAIJ, 1708 MatMultTranspose_MPIAIJ, 1709 MatMultTransposeAdd_MPIAIJ, 1710 0, 1711 0, 1712 0, 1713 /*10*/ 0, 1714 0, 1715 0, 1716 MatRelax_MPIAIJ, 1717 MatTranspose_MPIAIJ, 1718 /*15*/ MatGetInfo_MPIAIJ, 1719 MatEqual_MPIAIJ, 1720 MatGetDiagonal_MPIAIJ, 1721 MatDiagonalScale_MPIAIJ, 1722 MatNorm_MPIAIJ, 1723 /*20*/ MatAssemblyBegin_MPIAIJ, 1724 MatAssemblyEnd_MPIAIJ, 1725 0, 1726 MatSetOption_MPIAIJ, 1727 MatZeroEntries_MPIAIJ, 1728 /*25*/ MatZeroRows_MPIAIJ, 1729 0, 1730 0, 1731 0, 1732 0, 1733 /*30*/ MatSetUpPreallocation_MPIAIJ, 1734 0, 1735 0, 1736 0, 1737 0, 1738 /*35*/ MatDuplicate_MPIAIJ, 1739 0, 1740 0, 1741 0, 1742 0, 1743 /*40*/ MatAXPY_MPIAIJ, 1744 MatGetSubMatrices_MPIAIJ, 1745 MatIncreaseOverlap_MPIAIJ, 1746 MatGetValues_MPIAIJ, 1747 MatCopy_MPIAIJ, 1748 /*45*/ MatPrintHelp_MPIAIJ, 1749 MatScale_MPIAIJ, 1750 0, 1751 0, 1752 0, 1753 /*50*/ MatSetBlockSize_MPIAIJ, 1754 0, 1755 0, 1756 0, 1757 0, 1758 /*55*/ MatFDColoringCreate_MPIAIJ, 1759 0, 1760 MatSetUnfactored_MPIAIJ, 1761 MatPermute_MPIAIJ, 1762 0, 1763 /*60*/ MatGetSubMatrix_MPIAIJ, 1764 MatDestroy_MPIAIJ, 1765 MatView_MPIAIJ, 1766 0, 1767 0, 1768 /*65*/ 0, 1769 0, 1770 0, 1771 0, 1772 0, 1773 /*70*/ 0, 1774 0, 1775 MatSetColoring_MPIAIJ, 1776 #if defined(PETSC_HAVE_ADIC) 1777 MatSetValuesAdic_MPIAIJ, 1778 #else 1779 0, 1780 #endif 1781 MatSetValuesAdifor_MPIAIJ, 1782 /*75*/ 0, 1783 0, 1784 0, 1785 0, 1786 0, 1787 /*80*/ 0, 1788 0, 1789 0, 1790 0, 1791 /*84*/ MatLoad_MPIAIJ, 1792 0, 1793 0, 1794 0, 1795 0, 1796 0, 1797 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1798 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1799 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1800 MatPtAP_Basic, 1801 MatPtAPSymbolic_MPIAIJ, 1802 /*95*/ MatPtAPNumeric_MPIAIJ, 1803 0, 1804 0, 1805 0, 1806 0, 1807 /*100*/0, 1808 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1809 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1810 MatConjugate_MPIAIJ, 1811 0, 1812 /*105*/MatSetValuesRow_MPIAIJ, 1813 MatRealPart_MPIAIJ, 1814 MatImaginaryPart_MPIAIJ}; 1815 1816 /* ----------------------------------------------------------------------------------------*/ 1817 1818 EXTERN_C_BEGIN 1819 #undef __FUNCT__ 1820 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1821 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1822 { 1823 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1824 PetscErrorCode ierr; 1825 1826 PetscFunctionBegin; 1827 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1828 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1829 PetscFunctionReturn(0); 1830 } 1831 EXTERN_C_END 1832 1833 EXTERN_C_BEGIN 1834 #undef __FUNCT__ 1835 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1836 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1837 { 1838 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1839 PetscErrorCode ierr; 1840 1841 PetscFunctionBegin; 1842 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1843 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1844 PetscFunctionReturn(0); 1845 } 1846 EXTERN_C_END 1847 1848 #include "petscpc.h" 1849 EXTERN_C_BEGIN 1850 #undef __FUNCT__ 1851 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1852 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1853 { 1854 Mat_MPIAIJ *b; 1855 PetscErrorCode ierr; 1856 PetscInt i; 1857 1858 PetscFunctionBegin; 1859 B->preallocated = PETSC_TRUE; 1860 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1861 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1862 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1863 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1864 1865 B->rmap.bs = B->cmap.bs = 1; 1866 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 1867 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 1868 if (d_nnz) { 1869 for (i=0; i<B->rmap.n; i++) { 1870 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1871 } 1872 } 1873 if (o_nnz) { 1874 for (i=0; i<B->rmap.n; i++) { 1875 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1876 } 1877 } 1878 b = (Mat_MPIAIJ*)B->data; 1879 1880 /* Explicitly create 2 MATSEQAIJ matrices. */ 1881 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 1882 ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr); 1883 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1884 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 1885 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 1886 ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr); 1887 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1888 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 1889 1890 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1891 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1892 1893 PetscFunctionReturn(0); 1894 } 1895 EXTERN_C_END 1896 1897 #undef __FUNCT__ 1898 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1899 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1900 { 1901 Mat mat; 1902 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1903 PetscErrorCode ierr; 1904 1905 PetscFunctionBegin; 1906 *newmat = 0; 1907 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1908 ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr); 1909 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1910 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1911 a = (Mat_MPIAIJ*)mat->data; 1912 1913 mat->factor = matin->factor; 1914 mat->rmap.bs = matin->rmap.bs; 1915 mat->assembled = PETSC_TRUE; 1916 mat->insertmode = NOT_SET_VALUES; 1917 mat->preallocated = PETSC_TRUE; 1918 1919 a->size = oldmat->size; 1920 a->rank = oldmat->rank; 1921 a->donotstash = oldmat->donotstash; 1922 a->roworiented = oldmat->roworiented; 1923 a->rowindices = 0; 1924 a->rowvalues = 0; 1925 a->getrowactive = PETSC_FALSE; 1926 1927 ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr); 1928 ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr); 1929 1930 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1931 if (oldmat->colmap) { 1932 #if defined (PETSC_USE_CTABLE) 1933 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1934 #else 1935 ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1936 ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 1937 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr); 1938 #endif 1939 } else a->colmap = 0; 1940 if (oldmat->garray) { 1941 PetscInt len; 1942 len = oldmat->B->cmap.n; 1943 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1944 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1945 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1946 } else a->garray = 0; 1947 1948 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1949 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1950 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1951 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1952 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1953 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1954 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1955 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1956 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1957 *newmat = mat; 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #include "petscsys.h" 1962 1963 #undef __FUNCT__ 1964 #define __FUNCT__ "MatLoad_MPIAIJ" 1965 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 1966 { 1967 Mat A; 1968 PetscScalar *vals,*svals; 1969 MPI_Comm comm = ((PetscObject)viewer)->comm; 1970 MPI_Status status; 1971 PetscErrorCode ierr; 1972 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1973 PetscInt i,nz,j,rstart,rend,mmax; 1974 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1975 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1976 PetscInt cend,cstart,n,*rowners; 1977 int fd; 1978 1979 PetscFunctionBegin; 1980 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1981 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1982 if (!rank) { 1983 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1984 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1985 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1986 } 1987 1988 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1989 M = header[1]; N = header[2]; 1990 /* determine ownership of all rows */ 1991 m = M/size + ((M % size) > rank); 1992 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1993 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1994 1995 /* First process needs enough room for process with most rows */ 1996 if (!rank) { 1997 mmax = rowners[1]; 1998 for (i=2; i<size; i++) { 1999 mmax = PetscMax(mmax,rowners[i]); 2000 } 2001 } else mmax = m; 2002 2003 rowners[0] = 0; 2004 for (i=2; i<=size; i++) { 2005 mmax = PetscMax(mmax,rowners[i]); 2006 rowners[i] += rowners[i-1]; 2007 } 2008 rstart = rowners[rank]; 2009 rend = rowners[rank+1]; 2010 2011 /* distribute row lengths to all processors */ 2012 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2013 if (!rank) { 2014 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2015 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2016 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2017 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2018 for (j=0; j<m; j++) { 2019 procsnz[0] += ourlens[j]; 2020 } 2021 for (i=1; i<size; i++) { 2022 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2023 /* calculate the number of nonzeros on each processor */ 2024 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2025 procsnz[i] += rowlengths[j]; 2026 } 2027 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2028 } 2029 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2030 } else { 2031 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2032 } 2033 2034 if (!rank) { 2035 /* determine max buffer needed and allocate it */ 2036 maxnz = 0; 2037 for (i=0; i<size; i++) { 2038 maxnz = PetscMax(maxnz,procsnz[i]); 2039 } 2040 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2041 2042 /* read in my part of the matrix column indices */ 2043 nz = procsnz[0]; 2044 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2045 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2046 2047 /* read in every one elses and ship off */ 2048 for (i=1; i<size; i++) { 2049 nz = procsnz[i]; 2050 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2051 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2052 } 2053 ierr = PetscFree(cols);CHKERRQ(ierr); 2054 } else { 2055 /* determine buffer space needed for message */ 2056 nz = 0; 2057 for (i=0; i<m; i++) { 2058 nz += ourlens[i]; 2059 } 2060 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2061 2062 /* receive message of column indices*/ 2063 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2064 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2065 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2066 } 2067 2068 /* determine column ownership if matrix is not square */ 2069 if (N != M) { 2070 n = N/size + ((N % size) > rank); 2071 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2072 cstart = cend - n; 2073 } else { 2074 cstart = rstart; 2075 cend = rend; 2076 n = cend - cstart; 2077 } 2078 2079 /* loop over local rows, determining number of off diagonal entries */ 2080 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2081 jj = 0; 2082 for (i=0; i<m; i++) { 2083 for (j=0; j<ourlens[i]; j++) { 2084 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2085 jj++; 2086 } 2087 } 2088 2089 /* create our matrix */ 2090 for (i=0; i<m; i++) { 2091 ourlens[i] -= offlens[i]; 2092 } 2093 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2094 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2095 ierr = MatSetType(A,type);CHKERRQ(ierr); 2096 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2097 2098 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2099 for (i=0; i<m; i++) { 2100 ourlens[i] += offlens[i]; 2101 } 2102 2103 if (!rank) { 2104 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2105 2106 /* read in my part of the matrix numerical values */ 2107 nz = procsnz[0]; 2108 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2109 2110 /* insert into matrix */ 2111 jj = rstart; 2112 smycols = mycols; 2113 svals = vals; 2114 for (i=0; i<m; i++) { 2115 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2116 smycols += ourlens[i]; 2117 svals += ourlens[i]; 2118 jj++; 2119 } 2120 2121 /* read in other processors and ship out */ 2122 for (i=1; i<size; i++) { 2123 nz = procsnz[i]; 2124 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2125 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2126 } 2127 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2128 } else { 2129 /* receive numeric values */ 2130 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2131 2132 /* receive message of values*/ 2133 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2134 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2135 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2136 2137 /* insert into matrix */ 2138 jj = rstart; 2139 smycols = mycols; 2140 svals = vals; 2141 for (i=0; i<m; i++) { 2142 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2143 smycols += ourlens[i]; 2144 svals += ourlens[i]; 2145 jj++; 2146 } 2147 } 2148 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2149 ierr = PetscFree(vals);CHKERRQ(ierr); 2150 ierr = PetscFree(mycols);CHKERRQ(ierr); 2151 ierr = PetscFree(rowners);CHKERRQ(ierr); 2152 2153 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2154 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2155 *newmat = A; 2156 PetscFunctionReturn(0); 2157 } 2158 2159 #undef __FUNCT__ 2160 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2161 /* 2162 Not great since it makes two copies of the submatrix, first an SeqAIJ 2163 in local and then by concatenating the local matrices the end result. 2164 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2165 */ 2166 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2167 { 2168 PetscErrorCode ierr; 2169 PetscMPIInt rank,size; 2170 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2171 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2172 Mat *local,M,Mreuse; 2173 PetscScalar *vwork,*aa; 2174 MPI_Comm comm = mat->comm; 2175 Mat_SeqAIJ *aij; 2176 2177 2178 PetscFunctionBegin; 2179 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2180 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2181 2182 if (call == MAT_REUSE_MATRIX) { 2183 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2184 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2185 local = &Mreuse; 2186 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2187 } else { 2188 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2189 Mreuse = *local; 2190 ierr = PetscFree(local);CHKERRQ(ierr); 2191 } 2192 2193 /* 2194 m - number of local rows 2195 n - number of columns (same on all processors) 2196 rstart - first row in new global matrix generated 2197 */ 2198 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2199 if (call == MAT_INITIAL_MATRIX) { 2200 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2201 ii = aij->i; 2202 jj = aij->j; 2203 2204 /* 2205 Determine the number of non-zeros in the diagonal and off-diagonal 2206 portions of the matrix in order to do correct preallocation 2207 */ 2208 2209 /* first get start and end of "diagonal" columns */ 2210 if (csize == PETSC_DECIDE) { 2211 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2212 if (mglobal == n) { /* square matrix */ 2213 nlocal = m; 2214 } else { 2215 nlocal = n/size + ((n % size) > rank); 2216 } 2217 } else { 2218 nlocal = csize; 2219 } 2220 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2221 rstart = rend - nlocal; 2222 if (rank == size - 1 && rend != n) { 2223 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2224 } 2225 2226 /* next, compute all the lengths */ 2227 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2228 olens = dlens + m; 2229 for (i=0; i<m; i++) { 2230 jend = ii[i+1] - ii[i]; 2231 olen = 0; 2232 dlen = 0; 2233 for (j=0; j<jend; j++) { 2234 if (*jj < rstart || *jj >= rend) olen++; 2235 else dlen++; 2236 jj++; 2237 } 2238 olens[i] = olen; 2239 dlens[i] = dlen; 2240 } 2241 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2242 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2243 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2244 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2245 ierr = PetscFree(dlens);CHKERRQ(ierr); 2246 } else { 2247 PetscInt ml,nl; 2248 2249 M = *newmat; 2250 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2251 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2252 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2253 /* 2254 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2255 rather than the slower MatSetValues(). 2256 */ 2257 M->was_assembled = PETSC_TRUE; 2258 M->assembled = PETSC_FALSE; 2259 } 2260 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2261 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2262 ii = aij->i; 2263 jj = aij->j; 2264 aa = aij->a; 2265 for (i=0; i<m; i++) { 2266 row = rstart + i; 2267 nz = ii[i+1] - ii[i]; 2268 cwork = jj; jj += nz; 2269 vwork = aa; aa += nz; 2270 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2271 } 2272 2273 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2274 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2275 *newmat = M; 2276 2277 /* save submatrix used in processor for next request */ 2278 if (call == MAT_INITIAL_MATRIX) { 2279 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2280 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2281 } 2282 2283 PetscFunctionReturn(0); 2284 } 2285 2286 EXTERN_C_BEGIN 2287 #undef __FUNCT__ 2288 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2289 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2290 { 2291 PetscInt m,cstart, cend,j,nnz,i,d; 2292 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 2293 const PetscInt *JJ; 2294 PetscScalar *values; 2295 PetscErrorCode ierr; 2296 2297 PetscFunctionBegin; 2298 if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]); 2299 2300 B->rmap.bs = B->cmap.bs = 1; 2301 ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr); 2302 ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr); 2303 m = B->rmap.n; 2304 cstart = B->cmap.rstart; 2305 cend = B->cmap.rend; 2306 rstart = B->rmap.rstart; 2307 2308 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2309 o_nnz = d_nnz + m; 2310 2311 for (i=0; i<m; i++) { 2312 nnz = I[i+1]- I[i]; 2313 JJ = J + I[i]; 2314 nnz_max = PetscMax(nnz_max,nnz); 2315 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2316 for (j=0; j<nnz; j++) { 2317 if (*JJ >= cstart) break; 2318 JJ++; 2319 } 2320 d = 0; 2321 for (; j<nnz; j++) { 2322 if (*JJ++ >= cend) break; 2323 d++; 2324 } 2325 d_nnz[i] = d; 2326 o_nnz[i] = nnz - d; 2327 } 2328 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2329 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2330 2331 if (v) values = (PetscScalar*)v; 2332 else { 2333 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2334 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2335 } 2336 2337 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2338 for (i=0; i<m; i++) { 2339 ii = i + rstart; 2340 nnz = I[i+1]- I[i]; 2341 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2342 } 2343 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2344 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2345 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2346 2347 if (!v) { 2348 ierr = PetscFree(values);CHKERRQ(ierr); 2349 } 2350 PetscFunctionReturn(0); 2351 } 2352 EXTERN_C_END 2353 2354 #undef __FUNCT__ 2355 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2356 /*@ 2357 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2358 (the default parallel PETSc format). 2359 2360 Collective on MPI_Comm 2361 2362 Input Parameters: 2363 + B - the matrix 2364 . i - the indices into j for the start of each local row (starts with zero) 2365 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2366 - v - optional values in the matrix 2367 2368 Level: developer 2369 2370 Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal 2371 storage format. Thus changing the values in a[] after this call will not effect the matrix values. 2372 2373 .keywords: matrix, aij, compressed row, sparse, parallel 2374 2375 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ, 2376 MatCreateSeqAIJWithArrays() 2377 @*/ 2378 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2379 { 2380 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2381 2382 PetscFunctionBegin; 2383 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2384 if (f) { 2385 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2386 } 2387 PetscFunctionReturn(0); 2388 } 2389 2390 #undef __FUNCT__ 2391 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2392 /*@C 2393 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2394 (the default parallel PETSc format). For good matrix assembly performance 2395 the user should preallocate the matrix storage by setting the parameters 2396 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2397 performance can be increased by more than a factor of 50. 2398 2399 Collective on MPI_Comm 2400 2401 Input Parameters: 2402 + A - the matrix 2403 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2404 (same value is used for all local rows) 2405 . d_nnz - array containing the number of nonzeros in the various rows of the 2406 DIAGONAL portion of the local submatrix (possibly different for each row) 2407 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2408 The size of this array is equal to the number of local rows, i.e 'm'. 2409 You must leave room for the diagonal entry even if it is zero. 2410 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2411 submatrix (same value is used for all local rows). 2412 - o_nnz - array containing the number of nonzeros in the various rows of the 2413 OFF-DIAGONAL portion of the local submatrix (possibly different for 2414 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2415 structure. The size of this array is equal to the number 2416 of local rows, i.e 'm'. 2417 2418 If the *_nnz parameter is given then the *_nz parameter is ignored 2419 2420 The AIJ format (also called the Yale sparse matrix format or 2421 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2422 storage. The stored row and column indices begin with zero. See the users manual for details. 2423 2424 The parallel matrix is partitioned such that the first m0 rows belong to 2425 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2426 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2427 2428 The DIAGONAL portion of the local submatrix of a processor can be defined 2429 as the submatrix which is obtained by extraction the part corresponding 2430 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2431 first row that belongs to the processor, and r2 is the last row belonging 2432 to the this processor. This is a square mxm matrix. The remaining portion 2433 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2434 2435 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2436 2437 Example usage: 2438 2439 Consider the following 8x8 matrix with 34 non-zero values, that is 2440 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2441 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2442 as follows: 2443 2444 .vb 2445 1 2 0 | 0 3 0 | 0 4 2446 Proc0 0 5 6 | 7 0 0 | 8 0 2447 9 0 10 | 11 0 0 | 12 0 2448 ------------------------------------- 2449 13 0 14 | 15 16 17 | 0 0 2450 Proc1 0 18 0 | 19 20 21 | 0 0 2451 0 0 0 | 22 23 0 | 24 0 2452 ------------------------------------- 2453 Proc2 25 26 27 | 0 0 28 | 29 0 2454 30 0 0 | 31 32 33 | 0 34 2455 .ve 2456 2457 This can be represented as a collection of submatrices as: 2458 2459 .vb 2460 A B C 2461 D E F 2462 G H I 2463 .ve 2464 2465 Where the submatrices A,B,C are owned by proc0, D,E,F are 2466 owned by proc1, G,H,I are owned by proc2. 2467 2468 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2469 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2470 The 'M','N' parameters are 8,8, and have the same values on all procs. 2471 2472 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2473 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2474 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2475 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2476 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2477 matrix, ans [DF] as another SeqAIJ matrix. 2478 2479 When d_nz, o_nz parameters are specified, d_nz storage elements are 2480 allocated for every row of the local diagonal submatrix, and o_nz 2481 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2482 One way to choose d_nz and o_nz is to use the max nonzerors per local 2483 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2484 In this case, the values of d_nz,o_nz are: 2485 .vb 2486 proc0 : dnz = 2, o_nz = 2 2487 proc1 : dnz = 3, o_nz = 2 2488 proc2 : dnz = 1, o_nz = 4 2489 .ve 2490 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2491 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2492 for proc3. i.e we are using 12+15+10=37 storage locations to store 2493 34 values. 2494 2495 When d_nnz, o_nnz parameters are specified, the storage is specified 2496 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2497 In the above case the values for d_nnz,o_nnz are: 2498 .vb 2499 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2500 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2501 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2502 .ve 2503 Here the space allocated is sum of all the above values i.e 34, and 2504 hence pre-allocation is perfect. 2505 2506 Level: intermediate 2507 2508 .keywords: matrix, aij, compressed row, sparse, parallel 2509 2510 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2511 MPIAIJ 2512 @*/ 2513 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2514 { 2515 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2516 2517 PetscFunctionBegin; 2518 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2519 if (f) { 2520 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2521 } 2522 PetscFunctionReturn(0); 2523 } 2524 2525 #undef __FUNCT__ 2526 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 2527 /*@C 2528 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 2529 CSR format the local rows. 2530 2531 Collective on MPI_Comm 2532 2533 Input Parameters: 2534 + comm - MPI communicator 2535 . m - number of local rows (Cannot be PETSC_DECIDE) 2536 . n - This value should be the same as the local size used in creating the 2537 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2538 calculated if N is given) For square matrices n is almost always m. 2539 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2540 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2541 . i - row indices 2542 . j - column indices 2543 - a - matrix values 2544 2545 Output Parameter: 2546 . mat - the matrix 2547 Level: intermediate 2548 2549 Notes: 2550 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2551 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2552 called this routine. 2553 2554 The i and j indices are 0 based 2555 2556 .keywords: matrix, aij, compressed row, sparse, parallel 2557 2558 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2559 MPIAIJ, MatCreateMPIAIJ() 2560 @*/ 2561 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 2562 { 2563 PetscErrorCode ierr; 2564 2565 PetscFunctionBegin; 2566 if (i[0]) { 2567 SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2568 } 2569 if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2570 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2571 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 2572 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 2573 PetscFunctionReturn(0); 2574 } 2575 2576 #undef __FUNCT__ 2577 #define __FUNCT__ "MatCreateMPIAIJ" 2578 /*@C 2579 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2580 (the default parallel PETSc format). For good matrix assembly performance 2581 the user should preallocate the matrix storage by setting the parameters 2582 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2583 performance can be increased by more than a factor of 50. 2584 2585 Collective on MPI_Comm 2586 2587 Input Parameters: 2588 + comm - MPI communicator 2589 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2590 This value should be the same as the local size used in creating the 2591 y vector for the matrix-vector product y = Ax. 2592 . n - This value should be the same as the local size used in creating the 2593 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2594 calculated if N is given) For square matrices n is almost always m. 2595 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2596 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2597 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2598 (same value is used for all local rows) 2599 . d_nnz - array containing the number of nonzeros in the various rows of the 2600 DIAGONAL portion of the local submatrix (possibly different for each row) 2601 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2602 The size of this array is equal to the number of local rows, i.e 'm'. 2603 You must leave room for the diagonal entry even if it is zero. 2604 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2605 submatrix (same value is used for all local rows). 2606 - o_nnz - array containing the number of nonzeros in the various rows of the 2607 OFF-DIAGONAL portion of the local submatrix (possibly different for 2608 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2609 structure. The size of this array is equal to the number 2610 of local rows, i.e 'm'. 2611 2612 Output Parameter: 2613 . A - the matrix 2614 2615 Notes: 2616 If the *_nnz parameter is given then the *_nz parameter is ignored 2617 2618 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2619 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2620 storage requirements for this matrix. 2621 2622 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2623 processor than it must be used on all processors that share the object for 2624 that argument. 2625 2626 The user MUST specify either the local or global matrix dimensions 2627 (possibly both). 2628 2629 The parallel matrix is partitioned such that the first m0 rows belong to 2630 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2631 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2632 2633 The DIAGONAL portion of the local submatrix of a processor can be defined 2634 as the submatrix which is obtained by extraction the part corresponding 2635 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2636 first row that belongs to the processor, and r2 is the last row belonging 2637 to the this processor. This is a square mxm matrix. The remaining portion 2638 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2639 2640 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2641 2642 When calling this routine with a single process communicator, a matrix of 2643 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2644 type of communicator, use the construction mechanism: 2645 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2646 2647 By default, this format uses inodes (identical nodes) when possible. 2648 We search for consecutive rows with the same nonzero structure, thereby 2649 reusing matrix information to achieve increased efficiency. 2650 2651 Options Database Keys: 2652 + -mat_no_inode - Do not use inodes 2653 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2654 - -mat_aij_oneindex - Internally use indexing starting at 1 2655 rather than 0. Note that when calling MatSetValues(), 2656 the user still MUST index entries starting at 0! 2657 2658 2659 Example usage: 2660 2661 Consider the following 8x8 matrix with 34 non-zero values, that is 2662 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2663 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2664 as follows: 2665 2666 .vb 2667 1 2 0 | 0 3 0 | 0 4 2668 Proc0 0 5 6 | 7 0 0 | 8 0 2669 9 0 10 | 11 0 0 | 12 0 2670 ------------------------------------- 2671 13 0 14 | 15 16 17 | 0 0 2672 Proc1 0 18 0 | 19 20 21 | 0 0 2673 0 0 0 | 22 23 0 | 24 0 2674 ------------------------------------- 2675 Proc2 25 26 27 | 0 0 28 | 29 0 2676 30 0 0 | 31 32 33 | 0 34 2677 .ve 2678 2679 This can be represented as a collection of submatrices as: 2680 2681 .vb 2682 A B C 2683 D E F 2684 G H I 2685 .ve 2686 2687 Where the submatrices A,B,C are owned by proc0, D,E,F are 2688 owned by proc1, G,H,I are owned by proc2. 2689 2690 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2691 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2692 The 'M','N' parameters are 8,8, and have the same values on all procs. 2693 2694 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2695 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2696 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2697 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2698 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2699 matrix, ans [DF] as another SeqAIJ matrix. 2700 2701 When d_nz, o_nz parameters are specified, d_nz storage elements are 2702 allocated for every row of the local diagonal submatrix, and o_nz 2703 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2704 One way to choose d_nz and o_nz is to use the max nonzerors per local 2705 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2706 In this case, the values of d_nz,o_nz are: 2707 .vb 2708 proc0 : dnz = 2, o_nz = 2 2709 proc1 : dnz = 3, o_nz = 2 2710 proc2 : dnz = 1, o_nz = 4 2711 .ve 2712 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2713 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2714 for proc3. i.e we are using 12+15+10=37 storage locations to store 2715 34 values. 2716 2717 When d_nnz, o_nnz parameters are specified, the storage is specified 2718 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2719 In the above case the values for d_nnz,o_nnz are: 2720 .vb 2721 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2722 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2723 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2724 .ve 2725 Here the space allocated is sum of all the above values i.e 34, and 2726 hence pre-allocation is perfect. 2727 2728 Level: intermediate 2729 2730 .keywords: matrix, aij, compressed row, sparse, parallel 2731 2732 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2733 MPIAIJ, MatCreateMPIAIJWithArrays() 2734 @*/ 2735 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2736 { 2737 PetscErrorCode ierr; 2738 PetscMPIInt size; 2739 2740 PetscFunctionBegin; 2741 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2742 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2743 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2744 if (size > 1) { 2745 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2746 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2747 } else { 2748 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2749 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2750 } 2751 PetscFunctionReturn(0); 2752 } 2753 2754 #undef __FUNCT__ 2755 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2756 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2757 { 2758 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2759 2760 PetscFunctionBegin; 2761 *Ad = a->A; 2762 *Ao = a->B; 2763 *colmap = a->garray; 2764 PetscFunctionReturn(0); 2765 } 2766 2767 #undef __FUNCT__ 2768 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2769 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2770 { 2771 PetscErrorCode ierr; 2772 PetscInt i; 2773 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2774 2775 PetscFunctionBegin; 2776 if (coloring->ctype == IS_COLORING_LOCAL) { 2777 ISColoringValue *allcolors,*colors; 2778 ISColoring ocoloring; 2779 2780 /* set coloring for diagonal portion */ 2781 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2782 2783 /* set coloring for off-diagonal portion */ 2784 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2785 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2786 for (i=0; i<a->B->cmap.n; i++) { 2787 colors[i] = allcolors[a->garray[i]]; 2788 } 2789 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2790 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2791 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2792 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2793 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2794 ISColoringValue *colors; 2795 PetscInt *larray; 2796 ISColoring ocoloring; 2797 2798 /* set coloring for diagonal portion */ 2799 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2800 for (i=0; i<a->A->cmap.n; i++) { 2801 larray[i] = i + A->cmap.rstart; 2802 } 2803 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2804 ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2805 for (i=0; i<a->A->cmap.n; i++) { 2806 colors[i] = coloring->colors[larray[i]]; 2807 } 2808 ierr = PetscFree(larray);CHKERRQ(ierr); 2809 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2810 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2811 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2812 2813 /* set coloring for off-diagonal portion */ 2814 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2815 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2816 ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2817 for (i=0; i<a->B->cmap.n; i++) { 2818 colors[i] = coloring->colors[larray[i]]; 2819 } 2820 ierr = PetscFree(larray);CHKERRQ(ierr); 2821 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr); 2822 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2823 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2824 } else { 2825 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2826 } 2827 2828 PetscFunctionReturn(0); 2829 } 2830 2831 #if defined(PETSC_HAVE_ADIC) 2832 #undef __FUNCT__ 2833 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2834 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2835 { 2836 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2837 PetscErrorCode ierr; 2838 2839 PetscFunctionBegin; 2840 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2841 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2842 PetscFunctionReturn(0); 2843 } 2844 #endif 2845 2846 #undef __FUNCT__ 2847 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2848 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2849 { 2850 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2851 PetscErrorCode ierr; 2852 2853 PetscFunctionBegin; 2854 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2855 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2856 PetscFunctionReturn(0); 2857 } 2858 2859 #undef __FUNCT__ 2860 #define __FUNCT__ "MatMerge" 2861 /*@C 2862 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2863 matrices from each processor 2864 2865 Collective on MPI_Comm 2866 2867 Input Parameters: 2868 + comm - the communicators the parallel matrix will live on 2869 . inmat - the input sequential matrices 2870 . n - number of local columns (or PETSC_DECIDE) 2871 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2872 2873 Output Parameter: 2874 . outmat - the parallel matrix generated 2875 2876 Level: advanced 2877 2878 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2879 2880 @*/ 2881 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2882 { 2883 PetscErrorCode ierr; 2884 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2885 PetscInt *indx; 2886 PetscScalar *values; 2887 2888 PetscFunctionBegin; 2889 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2890 if (scall == MAT_INITIAL_MATRIX){ 2891 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2892 if (n == PETSC_DECIDE){ 2893 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 2894 } 2895 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2896 rstart -= m; 2897 2898 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2899 for (i=0;i<m;i++) { 2900 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2901 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2902 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2903 } 2904 /* This routine will ONLY return MPIAIJ type matrix */ 2905 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2906 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2907 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2908 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2909 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2910 2911 } else if (scall == MAT_REUSE_MATRIX){ 2912 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2913 } else { 2914 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2915 } 2916 2917 for (i=0;i<m;i++) { 2918 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2919 I = i + rstart; 2920 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2921 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2922 } 2923 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2924 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2925 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2926 2927 PetscFunctionReturn(0); 2928 } 2929 2930 #undef __FUNCT__ 2931 #define __FUNCT__ "MatFileSplit" 2932 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2933 { 2934 PetscErrorCode ierr; 2935 PetscMPIInt rank; 2936 PetscInt m,N,i,rstart,nnz; 2937 size_t len; 2938 const PetscInt *indx; 2939 PetscViewer out; 2940 char *name; 2941 Mat B; 2942 const PetscScalar *values; 2943 2944 PetscFunctionBegin; 2945 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2946 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2947 /* Should this be the type of the diagonal block of A? */ 2948 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2949 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 2950 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2951 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2952 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2953 for (i=0;i<m;i++) { 2954 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2955 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2956 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2957 } 2958 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2959 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2960 2961 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2962 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2963 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2964 sprintf(name,"%s.%d",outfile,rank); 2965 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 2966 ierr = PetscFree(name); 2967 ierr = MatView(B,out);CHKERRQ(ierr); 2968 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2969 ierr = MatDestroy(B);CHKERRQ(ierr); 2970 PetscFunctionReturn(0); 2971 } 2972 2973 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2974 #undef __FUNCT__ 2975 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2976 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2977 { 2978 PetscErrorCode ierr; 2979 Mat_Merge_SeqsToMPI *merge; 2980 PetscObjectContainer container; 2981 2982 PetscFunctionBegin; 2983 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2984 if (container) { 2985 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2986 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2987 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2988 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2989 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2990 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2991 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2992 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2993 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 2994 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 2995 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 2996 ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr); 2997 2998 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2999 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3000 } 3001 ierr = PetscFree(merge);CHKERRQ(ierr); 3002 3003 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3004 PetscFunctionReturn(0); 3005 } 3006 3007 #include "src/mat/utils/freespace.h" 3008 #include "petscbt.h" 3009 static PetscEvent logkey_seqstompinum = 0; 3010 #undef __FUNCT__ 3011 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 3012 /*@C 3013 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 3014 matrices from each processor 3015 3016 Collective on MPI_Comm 3017 3018 Input Parameters: 3019 + comm - the communicators the parallel matrix will live on 3020 . seqmat - the input sequential matrices 3021 . m - number of local rows (or PETSC_DECIDE) 3022 . n - number of local columns (or PETSC_DECIDE) 3023 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3024 3025 Output Parameter: 3026 . mpimat - the parallel matrix generated 3027 3028 Level: advanced 3029 3030 Notes: 3031 The dimensions of the sequential matrix in each processor MUST be the same. 3032 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 3033 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 3034 @*/ 3035 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 3036 { 3037 PetscErrorCode ierr; 3038 MPI_Comm comm=mpimat->comm; 3039 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3040 PetscMPIInt size,rank,taga,*len_s; 3041 PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j; 3042 PetscInt proc,m; 3043 PetscInt **buf_ri,**buf_rj; 3044 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3045 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3046 MPI_Request *s_waits,*r_waits; 3047 MPI_Status *status; 3048 MatScalar *aa=a->a,**abuf_r,*ba_i; 3049 Mat_Merge_SeqsToMPI *merge; 3050 PetscObjectContainer container; 3051 3052 PetscFunctionBegin; 3053 if (!logkey_seqstompinum) { 3054 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 3055 } 3056 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3057 3058 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3059 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3060 3061 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 3062 if (container) { 3063 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 3064 } 3065 bi = merge->bi; 3066 bj = merge->bj; 3067 buf_ri = merge->buf_ri; 3068 buf_rj = merge->buf_rj; 3069 3070 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3071 owners = merge->rowmap.range; 3072 len_s = merge->len_s; 3073 3074 /* send and recv matrix values */ 3075 /*-----------------------------*/ 3076 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3077 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3078 3079 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3080 for (proc=0,k=0; proc<size; proc++){ 3081 if (!len_s[proc]) continue; 3082 i = owners[proc]; 3083 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3084 k++; 3085 } 3086 3087 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3088 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3089 ierr = PetscFree(status);CHKERRQ(ierr); 3090 3091 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3092 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3093 3094 /* insert mat values of mpimat */ 3095 /*----------------------------*/ 3096 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3097 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3098 nextrow = buf_ri_k + merge->nrecv; 3099 nextai = nextrow + merge->nrecv; 3100 3101 for (k=0; k<merge->nrecv; k++){ 3102 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3103 nrows = *(buf_ri_k[k]); 3104 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3105 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3106 } 3107 3108 /* set values of ba */ 3109 m = merge->rowmap.n; 3110 for (i=0; i<m; i++) { 3111 arow = owners[rank] + i; 3112 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3113 bnzi = bi[i+1] - bi[i]; 3114 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3115 3116 /* add local non-zero vals of this proc's seqmat into ba */ 3117 anzi = ai[arow+1] - ai[arow]; 3118 aj = a->j + ai[arow]; 3119 aa = a->a + ai[arow]; 3120 nextaj = 0; 3121 for (j=0; nextaj<anzi; j++){ 3122 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3123 ba_i[j] += aa[nextaj++]; 3124 } 3125 } 3126 3127 /* add received vals into ba */ 3128 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3129 /* i-th row */ 3130 if (i == *nextrow[k]) { 3131 anzi = *(nextai[k]+1) - *nextai[k]; 3132 aj = buf_rj[k] + *(nextai[k]); 3133 aa = abuf_r[k] + *(nextai[k]); 3134 nextaj = 0; 3135 for (j=0; nextaj<anzi; j++){ 3136 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3137 ba_i[j] += aa[nextaj++]; 3138 } 3139 } 3140 nextrow[k]++; nextai[k]++; 3141 } 3142 } 3143 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3144 } 3145 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3146 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3147 3148 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3149 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3150 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3151 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3152 PetscFunctionReturn(0); 3153 } 3154 3155 static PetscEvent logkey_seqstompisym = 0; 3156 #undef __FUNCT__ 3157 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3158 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3159 { 3160 PetscErrorCode ierr; 3161 Mat B_mpi; 3162 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3163 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3164 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3165 PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j; 3166 PetscInt len,proc,*dnz,*onz; 3167 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3168 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3169 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3170 MPI_Status *status; 3171 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3172 PetscBT lnkbt; 3173 Mat_Merge_SeqsToMPI *merge; 3174 PetscObjectContainer container; 3175 3176 PetscFunctionBegin; 3177 if (!logkey_seqstompisym) { 3178 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3179 } 3180 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3181 3182 /* make sure it is a PETSc comm */ 3183 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3184 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3185 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3186 3187 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3188 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3189 3190 /* determine row ownership */ 3191 /*---------------------------------------------------------*/ 3192 merge->rowmap.n = m; 3193 merge->rowmap.N = M; 3194 merge->rowmap.bs = 1; 3195 ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr); 3196 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3197 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3198 3199 m = merge->rowmap.n; 3200 M = merge->rowmap.N; 3201 owners = merge->rowmap.range; 3202 3203 /* determine the number of messages to send, their lengths */ 3204 /*---------------------------------------------------------*/ 3205 len_s = merge->len_s; 3206 3207 len = 0; /* length of buf_si[] */ 3208 merge->nsend = 0; 3209 for (proc=0; proc<size; proc++){ 3210 len_si[proc] = 0; 3211 if (proc == rank){ 3212 len_s[proc] = 0; 3213 } else { 3214 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3215 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3216 } 3217 if (len_s[proc]) { 3218 merge->nsend++; 3219 nrows = 0; 3220 for (i=owners[proc]; i<owners[proc+1]; i++){ 3221 if (ai[i+1] > ai[i]) nrows++; 3222 } 3223 len_si[proc] = 2*(nrows+1); 3224 len += len_si[proc]; 3225 } 3226 } 3227 3228 /* determine the number and length of messages to receive for ij-structure */ 3229 /*-------------------------------------------------------------------------*/ 3230 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3231 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3232 3233 /* post the Irecv of j-structure */ 3234 /*-------------------------------*/ 3235 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 3236 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3237 3238 /* post the Isend of j-structure */ 3239 /*--------------------------------*/ 3240 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3241 sj_waits = si_waits + merge->nsend; 3242 3243 for (proc=0, k=0; proc<size; proc++){ 3244 if (!len_s[proc]) continue; 3245 i = owners[proc]; 3246 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3247 k++; 3248 } 3249 3250 /* receives and sends of j-structure are complete */ 3251 /*------------------------------------------------*/ 3252 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3253 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3254 3255 /* send and recv i-structure */ 3256 /*---------------------------*/ 3257 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 3258 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3259 3260 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3261 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3262 for (proc=0,k=0; proc<size; proc++){ 3263 if (!len_s[proc]) continue; 3264 /* form outgoing message for i-structure: 3265 buf_si[0]: nrows to be sent 3266 [1:nrows]: row index (global) 3267 [nrows+1:2*nrows+1]: i-structure index 3268 */ 3269 /*-------------------------------------------*/ 3270 nrows = len_si[proc]/2 - 1; 3271 buf_si_i = buf_si + nrows+1; 3272 buf_si[0] = nrows; 3273 buf_si_i[0] = 0; 3274 nrows = 0; 3275 for (i=owners[proc]; i<owners[proc+1]; i++){ 3276 anzi = ai[i+1] - ai[i]; 3277 if (anzi) { 3278 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3279 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3280 nrows++; 3281 } 3282 } 3283 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3284 k++; 3285 buf_si += len_si[proc]; 3286 } 3287 3288 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3289 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3290 3291 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3292 for (i=0; i<merge->nrecv; i++){ 3293 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3294 } 3295 3296 ierr = PetscFree(len_si);CHKERRQ(ierr); 3297 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3298 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3299 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3300 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3301 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3302 ierr = PetscFree(status);CHKERRQ(ierr); 3303 3304 /* compute a local seq matrix in each processor */ 3305 /*----------------------------------------------*/ 3306 /* allocate bi array and free space for accumulating nonzero column info */ 3307 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3308 bi[0] = 0; 3309 3310 /* create and initialize a linked list */ 3311 nlnk = N+1; 3312 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3313 3314 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3315 len = 0; 3316 len = ai[owners[rank+1]] - ai[owners[rank]]; 3317 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3318 current_space = free_space; 3319 3320 /* determine symbolic info for each local row */ 3321 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3322 nextrow = buf_ri_k + merge->nrecv; 3323 nextai = nextrow + merge->nrecv; 3324 for (k=0; k<merge->nrecv; k++){ 3325 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3326 nrows = *buf_ri_k[k]; 3327 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3328 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3329 } 3330 3331 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3332 len = 0; 3333 for (i=0;i<m;i++) { 3334 bnzi = 0; 3335 /* add local non-zero cols of this proc's seqmat into lnk */ 3336 arow = owners[rank] + i; 3337 anzi = ai[arow+1] - ai[arow]; 3338 aj = a->j + ai[arow]; 3339 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3340 bnzi += nlnk; 3341 /* add received col data into lnk */ 3342 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3343 if (i == *nextrow[k]) { /* i-th row */ 3344 anzi = *(nextai[k]+1) - *nextai[k]; 3345 aj = buf_rj[k] + *nextai[k]; 3346 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3347 bnzi += nlnk; 3348 nextrow[k]++; nextai[k]++; 3349 } 3350 } 3351 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3352 3353 /* if free space is not available, make more free space */ 3354 if (current_space->local_remaining<bnzi) { 3355 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3356 nspacedouble++; 3357 } 3358 /* copy data into free space, then initialize lnk */ 3359 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3360 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3361 3362 current_space->array += bnzi; 3363 current_space->local_used += bnzi; 3364 current_space->local_remaining -= bnzi; 3365 3366 bi[i+1] = bi[i] + bnzi; 3367 } 3368 3369 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3370 3371 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3372 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3373 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3374 3375 /* create symbolic parallel matrix B_mpi */ 3376 /*---------------------------------------*/ 3377 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3378 if (n==PETSC_DECIDE) { 3379 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3380 } else { 3381 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3382 } 3383 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3384 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3385 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3386 3387 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3388 B_mpi->assembled = PETSC_FALSE; 3389 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3390 merge->bi = bi; 3391 merge->bj = bj; 3392 merge->buf_ri = buf_ri; 3393 merge->buf_rj = buf_rj; 3394 merge->coi = PETSC_NULL; 3395 merge->coj = PETSC_NULL; 3396 merge->owners_co = PETSC_NULL; 3397 3398 /* attach the supporting struct to B_mpi for reuse */ 3399 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3400 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3401 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3402 *mpimat = B_mpi; 3403 3404 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3405 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3406 PetscFunctionReturn(0); 3407 } 3408 3409 static PetscEvent logkey_seqstompi = 0; 3410 #undef __FUNCT__ 3411 #define __FUNCT__ "MatMerge_SeqsToMPI" 3412 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3413 { 3414 PetscErrorCode ierr; 3415 3416 PetscFunctionBegin; 3417 if (!logkey_seqstompi) { 3418 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3419 } 3420 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3421 if (scall == MAT_INITIAL_MATRIX){ 3422 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3423 } 3424 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3425 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3426 PetscFunctionReturn(0); 3427 } 3428 static PetscEvent logkey_getlocalmat = 0; 3429 #undef __FUNCT__ 3430 #define __FUNCT__ "MatGetLocalMat" 3431 /*@C 3432 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3433 3434 Not Collective 3435 3436 Input Parameters: 3437 + A - the matrix 3438 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3439 3440 Output Parameter: 3441 . A_loc - the local sequential matrix generated 3442 3443 Level: developer 3444 3445 @*/ 3446 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3447 { 3448 PetscErrorCode ierr; 3449 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3450 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3451 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3452 PetscScalar *aa=a->a,*ba=b->a,*ca; 3453 PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart; 3454 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3455 3456 PetscFunctionBegin; 3457 if (!logkey_getlocalmat) { 3458 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3459 } 3460 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3461 if (scall == MAT_INITIAL_MATRIX){ 3462 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3463 ci[0] = 0; 3464 for (i=0; i<am; i++){ 3465 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3466 } 3467 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3468 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3469 k = 0; 3470 for (i=0; i<am; i++) { 3471 ncols_o = bi[i+1] - bi[i]; 3472 ncols_d = ai[i+1] - ai[i]; 3473 /* off-diagonal portion of A */ 3474 for (jo=0; jo<ncols_o; jo++) { 3475 col = cmap[*bj]; 3476 if (col >= cstart) break; 3477 cj[k] = col; bj++; 3478 ca[k++] = *ba++; 3479 } 3480 /* diagonal portion of A */ 3481 for (j=0; j<ncols_d; j++) { 3482 cj[k] = cstart + *aj++; 3483 ca[k++] = *aa++; 3484 } 3485 /* off-diagonal portion of A */ 3486 for (j=jo; j<ncols_o; j++) { 3487 cj[k] = cmap[*bj++]; 3488 ca[k++] = *ba++; 3489 } 3490 } 3491 /* put together the new matrix */ 3492 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3493 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3494 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3495 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3496 mat->freedata = PETSC_TRUE; 3497 mat->nonew = 0; 3498 } else if (scall == MAT_REUSE_MATRIX){ 3499 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3500 ci = mat->i; cj = mat->j; ca = mat->a; 3501 for (i=0; i<am; i++) { 3502 /* off-diagonal portion of A */ 3503 ncols_o = bi[i+1] - bi[i]; 3504 for (jo=0; jo<ncols_o; jo++) { 3505 col = cmap[*bj]; 3506 if (col >= cstart) break; 3507 *ca++ = *ba++; bj++; 3508 } 3509 /* diagonal portion of A */ 3510 ncols_d = ai[i+1] - ai[i]; 3511 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3512 /* off-diagonal portion of A */ 3513 for (j=jo; j<ncols_o; j++) { 3514 *ca++ = *ba++; bj++; 3515 } 3516 } 3517 } else { 3518 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3519 } 3520 3521 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3522 PetscFunctionReturn(0); 3523 } 3524 3525 static PetscEvent logkey_getlocalmatcondensed = 0; 3526 #undef __FUNCT__ 3527 #define __FUNCT__ "MatGetLocalMatCondensed" 3528 /*@C 3529 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3530 3531 Not Collective 3532 3533 Input Parameters: 3534 + A - the matrix 3535 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3536 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3537 3538 Output Parameter: 3539 . A_loc - the local sequential matrix generated 3540 3541 Level: developer 3542 3543 @*/ 3544 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3545 { 3546 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3547 PetscErrorCode ierr; 3548 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3549 IS isrowa,iscola; 3550 Mat *aloc; 3551 3552 PetscFunctionBegin; 3553 if (!logkey_getlocalmatcondensed) { 3554 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3555 } 3556 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3557 if (!row){ 3558 start = A->rmap.rstart; end = A->rmap.rend; 3559 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3560 } else { 3561 isrowa = *row; 3562 } 3563 if (!col){ 3564 start = A->cmap.rstart; 3565 cmap = a->garray; 3566 nzA = a->A->cmap.n; 3567 nzB = a->B->cmap.n; 3568 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3569 ncols = 0; 3570 for (i=0; i<nzB; i++) { 3571 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3572 else break; 3573 } 3574 imark = i; 3575 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3576 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3577 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3578 ierr = PetscFree(idx);CHKERRQ(ierr); 3579 } else { 3580 iscola = *col; 3581 } 3582 if (scall != MAT_INITIAL_MATRIX){ 3583 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3584 aloc[0] = *A_loc; 3585 } 3586 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3587 *A_loc = aloc[0]; 3588 ierr = PetscFree(aloc);CHKERRQ(ierr); 3589 if (!row){ 3590 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3591 } 3592 if (!col){ 3593 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3594 } 3595 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3596 PetscFunctionReturn(0); 3597 } 3598 3599 static PetscEvent logkey_GetBrowsOfAcols = 0; 3600 #undef __FUNCT__ 3601 #define __FUNCT__ "MatGetBrowsOfAcols" 3602 /*@C 3603 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3604 3605 Collective on Mat 3606 3607 Input Parameters: 3608 + A,B - the matrices in mpiaij format 3609 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3610 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3611 3612 Output Parameter: 3613 + rowb, colb - index sets of rows and columns of B to extract 3614 . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows 3615 - B_seq - the sequential matrix generated 3616 3617 Level: developer 3618 3619 @*/ 3620 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3621 { 3622 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3623 PetscErrorCode ierr; 3624 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3625 IS isrowb,iscolb; 3626 Mat *bseq; 3627 3628 PetscFunctionBegin; 3629 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3630 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3631 } 3632 if (!logkey_GetBrowsOfAcols) { 3633 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3634 } 3635 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3636 3637 if (scall == MAT_INITIAL_MATRIX){ 3638 start = A->cmap.rstart; 3639 cmap = a->garray; 3640 nzA = a->A->cmap.n; 3641 nzB = a->B->cmap.n; 3642 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3643 ncols = 0; 3644 for (i=0; i<nzB; i++) { /* row < local row index */ 3645 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3646 else break; 3647 } 3648 imark = i; 3649 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3650 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3651 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3652 ierr = PetscFree(idx);CHKERRQ(ierr); 3653 *brstart = imark; 3654 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr); 3655 } else { 3656 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3657 isrowb = *rowb; iscolb = *colb; 3658 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3659 bseq[0] = *B_seq; 3660 } 3661 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3662 *B_seq = bseq[0]; 3663 ierr = PetscFree(bseq);CHKERRQ(ierr); 3664 if (!rowb){ 3665 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3666 } else { 3667 *rowb = isrowb; 3668 } 3669 if (!colb){ 3670 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3671 } else { 3672 *colb = iscolb; 3673 } 3674 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3675 PetscFunctionReturn(0); 3676 } 3677 3678 static PetscEvent logkey_GetBrowsOfAocols = 0; 3679 #undef __FUNCT__ 3680 #define __FUNCT__ "MatGetBrowsOfAoCols" 3681 /*@C 3682 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3683 of the OFF-DIAGONAL portion of local A 3684 3685 Collective on Mat 3686 3687 Input Parameters: 3688 + A,B - the matrices in mpiaij format 3689 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3690 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3691 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3692 3693 Output Parameter: 3694 + B_oth - the sequential matrix generated 3695 3696 Level: developer 3697 3698 @*/ 3699 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3700 { 3701 VecScatter_MPI_General *gen_to,*gen_from; 3702 PetscErrorCode ierr; 3703 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3704 Mat_SeqAIJ *b_oth; 3705 VecScatter ctx=a->Mvctx; 3706 MPI_Comm comm=ctx->comm; 3707 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3708 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj; 3709 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3710 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3711 MPI_Request *rwaits,*swaits; 3712 MPI_Status *sstatus,rstatus; 3713 PetscInt *cols; 3714 PetscScalar *vals; 3715 PetscMPIInt j; 3716 3717 PetscFunctionBegin; 3718 if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){ 3719 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend); 3720 } 3721 if (!logkey_GetBrowsOfAocols) { 3722 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3723 } 3724 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3725 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3726 3727 gen_to = (VecScatter_MPI_General*)ctx->todata; 3728 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3729 rvalues = gen_from->values; /* holds the length of sending row */ 3730 svalues = gen_to->values; /* holds the length of receiving row */ 3731 nrecvs = gen_from->n; 3732 nsends = gen_to->n; 3733 3734 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 3735 srow = gen_to->indices; /* local row index to be sent */ 3736 rstarts = gen_from->starts; 3737 sstarts = gen_to->starts; 3738 rprocs = gen_from->procs; 3739 sprocs = gen_to->procs; 3740 sstatus = gen_to->sstatus; 3741 3742 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3743 if (scall == MAT_INITIAL_MATRIX){ 3744 /* i-array */ 3745 /*---------*/ 3746 /* post receives */ 3747 for (i=0; i<nrecvs; i++){ 3748 rowlen = (PetscInt*)rvalues + rstarts[i]; 3749 nrows = rstarts[i+1]-rstarts[i]; 3750 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3751 } 3752 3753 /* pack the outgoing message */ 3754 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3755 rstartsj = sstartsj + nsends +1; 3756 sstartsj[0] = 0; rstartsj[0] = 0; 3757 len = 0; /* total length of j or a array to be sent */ 3758 k = 0; 3759 for (i=0; i<nsends; i++){ 3760 rowlen = (PetscInt*)svalues + sstarts[i]; 3761 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3762 for (j=0; j<nrows; j++) { 3763 row = srow[k] + B->rmap.range[rank]; /* global row idx */ 3764 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3765 len += rowlen[j]; 3766 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3767 k++; 3768 } 3769 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3770 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3771 } 3772 /* recvs and sends of i-array are completed */ 3773 i = nrecvs; 3774 while (i--) { 3775 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3776 } 3777 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3778 /* allocate buffers for sending j and a arrays */ 3779 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3780 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3781 3782 /* create i-array of B_oth */ 3783 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3784 b_othi[0] = 0; 3785 len = 0; /* total length of j or a array to be received */ 3786 k = 0; 3787 for (i=0; i<nrecvs; i++){ 3788 rowlen = (PetscInt*)rvalues + rstarts[i]; 3789 nrows = rstarts[i+1]-rstarts[i]; 3790 for (j=0; j<nrows; j++) { 3791 b_othi[k+1] = b_othi[k] + rowlen[j]; 3792 len += rowlen[j]; k++; 3793 } 3794 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3795 } 3796 3797 /* allocate space for j and a arrrays of B_oth */ 3798 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3799 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3800 3801 /* j-array */ 3802 /*---------*/ 3803 /* post receives of j-array */ 3804 for (i=0; i<nrecvs; i++){ 3805 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3806 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3807 } 3808 k = 0; 3809 for (i=0; i<nsends; i++){ 3810 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3811 bufJ = bufj+sstartsj[i]; 3812 for (j=0; j<nrows; j++) { 3813 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3814 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3815 for (l=0; l<ncols; l++){ 3816 *bufJ++ = cols[l]; 3817 } 3818 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3819 } 3820 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3821 } 3822 3823 /* recvs and sends of j-array are completed */ 3824 i = nrecvs; 3825 while (i--) { 3826 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3827 } 3828 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3829 } else if (scall == MAT_REUSE_MATRIX){ 3830 sstartsj = *startsj; 3831 rstartsj = sstartsj + nsends +1; 3832 bufa = *bufa_ptr; 3833 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3834 b_otha = b_oth->a; 3835 } else { 3836 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3837 } 3838 3839 /* a-array */ 3840 /*---------*/ 3841 /* post receives of a-array */ 3842 for (i=0; i<nrecvs; i++){ 3843 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3844 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3845 } 3846 k = 0; 3847 for (i=0; i<nsends; i++){ 3848 nrows = sstarts[i+1]-sstarts[i]; 3849 bufA = bufa+sstartsj[i]; 3850 for (j=0; j<nrows; j++) { 3851 row = srow[k++] + B->rmap.range[rank]; /* global row idx */ 3852 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3853 for (l=0; l<ncols; l++){ 3854 *bufA++ = vals[l]; 3855 } 3856 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3857 3858 } 3859 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3860 } 3861 /* recvs and sends of a-array are completed */ 3862 i = nrecvs; 3863 while (i--) { 3864 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3865 } 3866 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3867 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 3868 3869 if (scall == MAT_INITIAL_MATRIX){ 3870 /* put together the new matrix */ 3871 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3872 3873 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3874 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3875 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3876 b_oth->freedata = PETSC_TRUE; 3877 b_oth->nonew = 0; 3878 3879 ierr = PetscFree(bufj);CHKERRQ(ierr); 3880 if (!startsj || !bufa_ptr){ 3881 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3882 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3883 } else { 3884 *startsj = sstartsj; 3885 *bufa_ptr = bufa; 3886 } 3887 } 3888 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3889 3890 PetscFunctionReturn(0); 3891 } 3892 3893 #undef __FUNCT__ 3894 #define __FUNCT__ "MatGetCommunicationStructs" 3895 /*@C 3896 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 3897 3898 Not Collective 3899 3900 Input Parameters: 3901 . A - The matrix in mpiaij format 3902 3903 Output Parameter: 3904 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 3905 . colmap - A map from global column index to local index into lvec 3906 - multScatter - A scatter from the argument of a matrix-vector product to lvec 3907 3908 Level: developer 3909 3910 @*/ 3911 #if defined (PETSC_USE_CTABLE) 3912 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 3913 #else 3914 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 3915 #endif 3916 { 3917 Mat_MPIAIJ *a; 3918 3919 PetscFunctionBegin; 3920 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 3921 PetscValidPointer(lvec, 2) 3922 PetscValidPointer(colmap, 3) 3923 PetscValidPointer(multScatter, 4) 3924 a = (Mat_MPIAIJ *) A->data; 3925 if (lvec) *lvec = a->lvec; 3926 if (colmap) *colmap = a->colmap; 3927 if (multScatter) *multScatter = a->Mvctx; 3928 PetscFunctionReturn(0); 3929 } 3930 3931 /*MC 3932 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3933 3934 Options Database Keys: 3935 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3936 3937 Level: beginner 3938 3939 .seealso: MatCreateMPIAIJ 3940 M*/ 3941 3942 EXTERN_C_BEGIN 3943 #undef __FUNCT__ 3944 #define __FUNCT__ "MatCreate_MPIAIJ" 3945 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3946 { 3947 Mat_MPIAIJ *b; 3948 PetscErrorCode ierr; 3949 PetscMPIInt size; 3950 3951 PetscFunctionBegin; 3952 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3953 3954 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3955 B->data = (void*)b; 3956 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3957 B->factor = 0; 3958 B->rmap.bs = 1; 3959 B->assembled = PETSC_FALSE; 3960 B->mapping = 0; 3961 3962 B->insertmode = NOT_SET_VALUES; 3963 b->size = size; 3964 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3965 3966 /* build cache for off array entries formed */ 3967 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3968 b->donotstash = PETSC_FALSE; 3969 b->colmap = 0; 3970 b->garray = 0; 3971 b->roworiented = PETSC_TRUE; 3972 3973 /* stuff used for matrix vector multiply */ 3974 b->lvec = PETSC_NULL; 3975 b->Mvctx = PETSC_NULL; 3976 3977 /* stuff for MatGetRow() */ 3978 b->rowindices = 0; 3979 b->rowvalues = 0; 3980 b->getrowactive = PETSC_FALSE; 3981 3982 3983 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3984 "MatStoreValues_MPIAIJ", 3985 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3986 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3987 "MatRetrieveValues_MPIAIJ", 3988 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3989 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3990 "MatGetDiagonalBlock_MPIAIJ", 3991 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3992 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3993 "MatIsTranspose_MPIAIJ", 3994 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3995 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3996 "MatMPIAIJSetPreallocation_MPIAIJ", 3997 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3998 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3999 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 4000 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 4001 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 4002 "MatDiagonalScaleLocal_MPIAIJ", 4003 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 4004 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpiaij_mpidense_C", 4005 "MatMatMult_MPIAIJ_MPIDense", 4006 MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr); 4007 PetscFunctionReturn(0); 4008 } 4009 EXTERN_C_END 4010 4011 /* 4012 Special version for direct calls from Fortran 4013 */ 4014 #if defined(PETSC_HAVE_FORTRAN_CAPS) 4015 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 4016 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 4017 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 4018 #endif 4019 4020 /* Change these macros so can be used in void function */ 4021 #undef CHKERRQ 4022 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 4023 #undef SETERRQ2 4024 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 4025 #undef SETERRQ 4026 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 4027 4028 EXTERN_C_BEGIN 4029 #undef __FUNCT__ 4030 #define __FUNCT__ "matsetvaluesmpiaij_" 4031 void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv) 4032 { 4033 Mat mat = *mmat; 4034 PetscInt m = *mm, n = *mn; 4035 InsertMode addv = *maddv; 4036 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 4037 PetscScalar value; 4038 PetscErrorCode ierr; 4039 4040 MatPreallocated(mat); 4041 if (mat->insertmode == NOT_SET_VALUES) { 4042 mat->insertmode = addv; 4043 } 4044 #if defined(PETSC_USE_DEBUG) 4045 else if (mat->insertmode != addv) { 4046 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4047 } 4048 #endif 4049 { 4050 PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend; 4051 PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col; 4052 PetscTruth roworiented = aij->roworiented; 4053 4054 /* Some Variables required in the macro */ 4055 Mat A = aij->A; 4056 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4057 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4058 PetscScalar *aa = a->a; 4059 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4060 Mat B = aij->B; 4061 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4062 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n; 4063 PetscScalar *ba = b->a; 4064 4065 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4066 PetscInt nonew = a->nonew; 4067 PetscScalar *ap1,*ap2; 4068 4069 PetscFunctionBegin; 4070 for (i=0; i<m; i++) { 4071 if (im[i] < 0) continue; 4072 #if defined(PETSC_USE_DEBUG) 4073 if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1); 4074 #endif 4075 if (im[i] >= rstart && im[i] < rend) { 4076 row = im[i] - rstart; 4077 lastcol1 = -1; 4078 rp1 = aj + ai[row]; 4079 ap1 = aa + ai[row]; 4080 rmax1 = aimax[row]; 4081 nrow1 = ailen[row]; 4082 low1 = 0; 4083 high1 = nrow1; 4084 lastcol2 = -1; 4085 rp2 = bj + bi[row]; 4086 ap2 = ba + bi[row]; 4087 rmax2 = bimax[row]; 4088 nrow2 = bilen[row]; 4089 low2 = 0; 4090 high2 = nrow2; 4091 4092 for (j=0; j<n; j++) { 4093 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4094 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4095 if (in[j] >= cstart && in[j] < cend){ 4096 col = in[j] - cstart; 4097 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4098 } else if (in[j] < 0) continue; 4099 #if defined(PETSC_USE_DEBUG) 4100 else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);} 4101 #endif 4102 else { 4103 if (mat->was_assembled) { 4104 if (!aij->colmap) { 4105 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4106 } 4107 #if defined (PETSC_USE_CTABLE) 4108 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4109 col--; 4110 #else 4111 col = aij->colmap[in[j]] - 1; 4112 #endif 4113 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4114 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4115 col = in[j]; 4116 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4117 B = aij->B; 4118 b = (Mat_SeqAIJ*)B->data; 4119 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4120 rp2 = bj + bi[row]; 4121 ap2 = ba + bi[row]; 4122 rmax2 = bimax[row]; 4123 nrow2 = bilen[row]; 4124 low2 = 0; 4125 high2 = nrow2; 4126 bm = aij->B->rmap.n; 4127 ba = b->a; 4128 } 4129 } else col = in[j]; 4130 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4131 } 4132 } 4133 } else { 4134 if (!aij->donotstash) { 4135 if (roworiented) { 4136 if (ignorezeroentries && v[i*n] == 0.0) continue; 4137 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4138 } else { 4139 if (ignorezeroentries && v[i] == 0.0) continue; 4140 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4141 } 4142 } 4143 } 4144 }} 4145 PetscFunctionReturnVoid(); 4146 } 4147 EXTERN_C_END 4148