1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 141 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->m; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatGetValues_MPIAIJ" 241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 242 { 243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 244 PetscErrorCode ierr; 245 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 246 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 247 248 PetscFunctionBegin; 249 for (i=0; i<m; i++) { 250 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 251 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 252 if (idxm[i] >= rstart && idxm[i] < rend) { 253 row = idxm[i] - rstart; 254 for (j=0; j<n; j++) { 255 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 256 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 257 if (idxn[j] >= cstart && idxn[j] < cend){ 258 col = idxn[j] - cstart; 259 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 260 } else { 261 if (!aij->colmap) { 262 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 263 } 264 #if defined (PETSC_USE_CTABLE) 265 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 266 col --; 267 #else 268 col = aij->colmap[idxn[j]] - 1; 269 #endif 270 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 271 else { 272 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 273 } 274 } 275 } 276 } else { 277 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 278 } 279 } 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 if (aij->donotstash) { 294 PetscFunctionReturn(0); 295 } 296 297 /* make sure all processors are either in INSERTMODE or ADDMODE */ 298 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 299 if (addv == (ADD_VALUES|INSERT_VALUES)) { 300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 301 } 302 mat->insertmode = addv; /* in case this processor had no cache */ 303 304 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 305 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 306 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 313 { 314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 315 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 316 PetscErrorCode ierr; 317 PetscMPIInt n; 318 PetscInt i,j,rstart,ncols,flg; 319 PetscInt *row,*col,other_disassembled; 320 PetscScalar *val; 321 InsertMode addv = mat->insertmode; 322 323 PetscFunctionBegin; 324 if (!aij->donotstash) { 325 while (1) { 326 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 327 if (!flg) break; 328 329 for (i=0; i<n;) { 330 /* Now identify the consecutive vals belonging to the same row */ 331 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 332 if (j < n) ncols = j-i; 333 else ncols = n-i; 334 /* Now assemble all these values with a single function call */ 335 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 336 i = j; 337 } 338 } 339 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 340 } 341 a->compressedrow.use = PETSC_FALSE; 342 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 344 345 /* determine if any processor has disassembled, if so we must 346 also disassemble ourselfs, in order that we may reassemble. */ 347 /* 348 if nonzero structure of submatrix B cannot change then we know that 349 no processor disassembled thus we can skip this stuff 350 */ 351 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 352 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 353 if (mat->was_assembled && !other_disassembled) { 354 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 355 } 356 } 357 /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */ 358 b = (Mat_SeqAIJ *)aij->B->data; 359 360 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 361 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 362 } 363 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 364 b->compressedrow.use = PETSC_TRUE; 365 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 366 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 367 368 if (aij->rowvalues) { 369 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 370 aij->rowvalues = 0; 371 } 372 373 /* used by MatAXPY() */ 374 a->xtoy = 0; b->xtoy = 0; 375 a->XtoY = 0; b->XtoY = 0; 376 377 PetscFunctionReturn(0); 378 } 379 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 382 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 383 { 384 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 385 PetscErrorCode ierr; 386 387 PetscFunctionBegin; 388 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 389 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 390 PetscFunctionReturn(0); 391 } 392 393 #undef __FUNCT__ 394 #define __FUNCT__ "MatZeroRows_MPIAIJ" 395 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 396 { 397 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 398 PetscErrorCode ierr; 399 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 400 PetscInt i,*owners = l->rowners; 401 PetscInt *nprocs,j,idx,nsends,row; 402 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 403 PetscInt *rvalues,count,base,slen,*source; 404 PetscInt *lens,*lrows,*values,rstart=l->rstart; 405 MPI_Comm comm = A->comm; 406 MPI_Request *send_waits,*recv_waits; 407 MPI_Status recv_status,*send_status; 408 #if defined(PETSC_DEBUG) 409 PetscTruth found = PETSC_FALSE; 410 #endif 411 412 PetscFunctionBegin; 413 /* first count number of contributors to each processor */ 414 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 415 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 416 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 417 j = 0; 418 for (i=0; i<N; i++) { 419 if (lastidx > (idx = rows[i])) j = 0; 420 lastidx = idx; 421 for (; j<size; j++) { 422 if (idx >= owners[j] && idx < owners[j+1]) { 423 nprocs[2*j]++; 424 nprocs[2*j+1] = 1; 425 owner[i] = j; 426 #if defined(PETSC_DEBUG) 427 found = PETSC_TRUE; 428 #endif 429 break; 430 } 431 } 432 #if defined(PETSC_DEBUG) 433 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 434 found = PETSC_FALSE; 435 #endif 436 } 437 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 438 439 /* inform other processors of number of messages and max length*/ 440 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 441 442 /* post receives: */ 443 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 444 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 445 for (i=0; i<nrecvs; i++) { 446 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 447 } 448 449 /* do sends: 450 1) starts[i] gives the starting index in svalues for stuff going to 451 the ith processor 452 */ 453 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 454 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 455 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 456 starts[0] = 0; 457 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 458 for (i=0; i<N; i++) { 459 svalues[starts[owner[i]]++] = rows[i]; 460 } 461 462 starts[0] = 0; 463 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 464 count = 0; 465 for (i=0; i<size; i++) { 466 if (nprocs[2*i+1]) { 467 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 468 } 469 } 470 ierr = PetscFree(starts);CHKERRQ(ierr); 471 472 base = owners[rank]; 473 474 /* wait on receives */ 475 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 476 source = lens + nrecvs; 477 count = nrecvs; slen = 0; 478 while (count) { 479 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 480 /* unpack receives into our local space */ 481 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 482 source[imdex] = recv_status.MPI_SOURCE; 483 lens[imdex] = n; 484 slen += n; 485 count--; 486 } 487 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 488 489 /* move the data into the send scatter */ 490 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 491 count = 0; 492 for (i=0; i<nrecvs; i++) { 493 values = rvalues + i*nmax; 494 for (j=0; j<lens[i]; j++) { 495 lrows[count++] = values[j] - base; 496 } 497 } 498 ierr = PetscFree(rvalues);CHKERRQ(ierr); 499 ierr = PetscFree(lens);CHKERRQ(ierr); 500 ierr = PetscFree(owner);CHKERRQ(ierr); 501 ierr = PetscFree(nprocs);CHKERRQ(ierr); 502 503 /* actually zap the local rows */ 504 /* 505 Zero the required rows. If the "diagonal block" of the matrix 506 is square and the user wishes to set the diagonal we use separate 507 code so that MatSetValues() is not called for each diagonal allocating 508 new memory, thus calling lots of mallocs and slowing things down. 509 510 Contributed by: Matthew Knepley 511 */ 512 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 513 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 514 if ((diag != 0.0) && (l->A->M == l->A->N)) { 515 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 516 } else if (diag != 0.0) { 517 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 518 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 519 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 520 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 521 } 522 for (i = 0; i < slen; i++) { 523 row = lrows[i] + rstart; 524 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 525 } 526 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 527 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 } else { 529 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 530 } 531 ierr = PetscFree(lrows);CHKERRQ(ierr); 532 533 /* wait on sends */ 534 if (nsends) { 535 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 536 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 537 ierr = PetscFree(send_status);CHKERRQ(ierr); 538 } 539 ierr = PetscFree(send_waits);CHKERRQ(ierr); 540 ierr = PetscFree(svalues);CHKERRQ(ierr); 541 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatMult_MPIAIJ" 547 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 548 { 549 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 550 PetscErrorCode ierr; 551 PetscInt nt; 552 553 PetscFunctionBegin; 554 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 555 if (nt != A->n) { 556 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 557 } 558 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 559 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 560 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 561 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 562 PetscFunctionReturn(0); 563 } 564 565 #undef __FUNCT__ 566 #define __FUNCT__ "MatMultAdd_MPIAIJ" 567 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 568 { 569 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 570 PetscErrorCode ierr; 571 572 PetscFunctionBegin; 573 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 574 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 575 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 576 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 582 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 583 { 584 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 585 PetscErrorCode ierr; 586 PetscTruth merged; 587 588 PetscFunctionBegin; 589 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 590 /* do nondiagonal part */ 591 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 592 if (!merged) { 593 /* send it on its way */ 594 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 595 /* do local part */ 596 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 597 /* receive remote parts: note this assumes the values are not actually */ 598 /* added in yy until the next line, */ 599 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 600 } else { 601 /* do local part */ 602 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 603 /* send it on its way */ 604 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 605 /* values actually were received in the Begin() but we need to call this nop */ 606 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 607 } 608 PetscFunctionReturn(0); 609 } 610 611 EXTERN_C_BEGIN 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 614 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 615 { 616 MPI_Comm comm; 617 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 618 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 619 IS Me,Notme; 620 PetscErrorCode ierr; 621 PetscInt M,N,first,last,*notme,i; 622 PetscMPIInt size; 623 624 PetscFunctionBegin; 625 626 /* Easy test: symmetric diagonal block */ 627 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 628 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 629 if (!*f) PetscFunctionReturn(0); 630 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 631 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 632 if (size == 1) PetscFunctionReturn(0); 633 634 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 635 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 636 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 637 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 638 for (i=0; i<first; i++) notme[i] = i; 639 for (i=last; i<M; i++) notme[i-last+first] = i; 640 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 641 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 642 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 643 Aoff = Aoffs[0]; 644 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 645 Boff = Boffs[0]; 646 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 647 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 648 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 649 ierr = ISDestroy(Me);CHKERRQ(ierr); 650 ierr = ISDestroy(Notme);CHKERRQ(ierr); 651 652 PetscFunctionReturn(0); 653 } 654 EXTERN_C_END 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 658 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 659 { 660 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 661 PetscErrorCode ierr; 662 663 PetscFunctionBegin; 664 /* do nondiagonal part */ 665 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 666 /* send it on its way */ 667 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 668 /* do local part */ 669 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 670 /* receive remote parts */ 671 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 672 PetscFunctionReturn(0); 673 } 674 675 /* 676 This only works correctly for square matrices where the subblock A->A is the 677 diagonal block 678 */ 679 #undef __FUNCT__ 680 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 681 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 682 { 683 PetscErrorCode ierr; 684 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 685 686 PetscFunctionBegin; 687 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 688 if (a->rstart != a->cstart || a->rend != a->cend) { 689 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 690 } 691 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatScale_MPIAIJ" 697 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 698 { 699 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 700 PetscErrorCode ierr; 701 702 PetscFunctionBegin; 703 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 704 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatDestroy_MPIAIJ" 710 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 711 { 712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 713 PetscErrorCode ierr; 714 715 PetscFunctionBegin; 716 #if defined(PETSC_USE_LOG) 717 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 718 #endif 719 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 720 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 721 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 722 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 723 #if defined (PETSC_USE_CTABLE) 724 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 725 #else 726 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 727 #endif 728 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 729 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 730 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 731 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 732 ierr = PetscFree(aij);CHKERRQ(ierr); 733 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 737 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 738 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 739 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 740 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatView_MPIAIJ_Binary" 746 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 747 { 748 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 749 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 750 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 751 PetscErrorCode ierr; 752 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 753 int fd; 754 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 755 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 756 PetscScalar *column_values; 757 758 PetscFunctionBegin; 759 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 760 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 761 nz = A->nz + B->nz; 762 if (!rank) { 763 header[0] = MAT_FILE_COOKIE; 764 header[1] = mat->M; 765 header[2] = mat->N; 766 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 767 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 768 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 769 /* get largest number of rows any processor has */ 770 rlen = mat->m; 771 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 772 for (i=1; i<size; i++) { 773 rlen = PetscMax(rlen,range[i+1] - range[i]); 774 } 775 } else { 776 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 777 rlen = mat->m; 778 } 779 780 /* load up the local row counts */ 781 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 782 for (i=0; i<mat->m; i++) { 783 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 784 } 785 786 /* store the row lengths to the file */ 787 if (!rank) { 788 MPI_Status status; 789 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 790 for (i=1; i<size; i++) { 791 rlen = range[i+1] - range[i]; 792 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 793 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 794 } 795 } else { 796 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 797 } 798 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 799 800 /* load up the local column indices */ 801 nzmax = nz; /* )th processor needs space a largest processor needs */ 802 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 803 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 804 cnt = 0; 805 for (i=0; i<mat->m; i++) { 806 for (j=B->i[i]; j<B->i[i+1]; j++) { 807 if ( (col = garray[B->j[j]]) > cstart) break; 808 column_indices[cnt++] = col; 809 } 810 for (k=A->i[i]; k<A->i[i+1]; k++) { 811 column_indices[cnt++] = A->j[k] + cstart; 812 } 813 for (; j<B->i[i+1]; j++) { 814 column_indices[cnt++] = garray[B->j[j]]; 815 } 816 } 817 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 818 819 /* store the column indices to the file */ 820 if (!rank) { 821 MPI_Status status; 822 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 823 for (i=1; i<size; i++) { 824 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 825 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 826 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 827 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 828 } 829 } else { 830 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 831 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 832 } 833 ierr = PetscFree(column_indices);CHKERRQ(ierr); 834 835 /* load up the local column values */ 836 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 837 cnt = 0; 838 for (i=0; i<mat->m; i++) { 839 for (j=B->i[i]; j<B->i[i+1]; j++) { 840 if ( garray[B->j[j]] > cstart) break; 841 column_values[cnt++] = B->a[j]; 842 } 843 for (k=A->i[i]; k<A->i[i+1]; k++) { 844 column_values[cnt++] = A->a[k]; 845 } 846 for (; j<B->i[i+1]; j++) { 847 column_values[cnt++] = B->a[j]; 848 } 849 } 850 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 851 852 /* store the column values to the file */ 853 if (!rank) { 854 MPI_Status status; 855 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 856 for (i=1; i<size; i++) { 857 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 858 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 859 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 860 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 861 } 862 } else { 863 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 864 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 865 } 866 ierr = PetscFree(column_values);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 872 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 873 { 874 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 875 PetscErrorCode ierr; 876 PetscMPIInt rank = aij->rank,size = aij->size; 877 PetscTruth isdraw,iascii,isbinary; 878 PetscViewer sviewer; 879 PetscViewerFormat format; 880 881 PetscFunctionBegin; 882 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 883 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 884 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 885 if (iascii) { 886 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 887 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 888 MatInfo info; 889 PetscTruth inodes; 890 891 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 892 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 893 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 894 if (!inodes) { 895 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 896 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 897 } else { 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 899 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 900 } 901 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 902 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 903 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 904 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 905 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 906 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } else if (format == PETSC_VIEWER_ASCII_INFO) { 909 PetscInt inodecount,inodelimit,*inodes; 910 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 911 if (inodes) { 912 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 913 } else { 914 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 915 } 916 PetscFunctionReturn(0); 917 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 918 PetscFunctionReturn(0); 919 } 920 } else if (isbinary) { 921 if (size == 1) { 922 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 923 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 924 } else { 925 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 926 } 927 PetscFunctionReturn(0); 928 } else if (isdraw) { 929 PetscDraw draw; 930 PetscTruth isnull; 931 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 932 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 933 } 934 935 if (size == 1) { 936 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 937 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 938 } else { 939 /* assemble the entire matrix onto first processor. */ 940 Mat A; 941 Mat_SeqAIJ *Aloc; 942 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 943 PetscScalar *a; 944 945 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 946 if (!rank) { 947 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 948 } else { 949 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 950 } 951 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 952 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 953 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 954 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 955 956 /* copy over the A part */ 957 Aloc = (Mat_SeqAIJ*)aij->A->data; 958 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 959 row = aij->rstart; 960 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 961 for (i=0; i<m; i++) { 962 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 963 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 964 } 965 aj = Aloc->j; 966 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 967 968 /* copy over the B part */ 969 Aloc = (Mat_SeqAIJ*)aij->B->data; 970 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 971 row = aij->rstart; 972 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 973 ct = cols; 974 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 975 for (i=0; i<m; i++) { 976 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 977 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 978 } 979 ierr = PetscFree(ct);CHKERRQ(ierr); 980 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 981 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 982 /* 983 Everyone has to call to draw the matrix since the graphics waits are 984 synchronized across all processors that share the PetscDraw object 985 */ 986 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 987 if (!rank) { 988 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 989 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 990 } 991 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 992 ierr = MatDestroy(A);CHKERRQ(ierr); 993 } 994 PetscFunctionReturn(0); 995 } 996 997 #undef __FUNCT__ 998 #define __FUNCT__ "MatView_MPIAIJ" 999 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1000 { 1001 PetscErrorCode ierr; 1002 PetscTruth iascii,isdraw,issocket,isbinary; 1003 1004 PetscFunctionBegin; 1005 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1006 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1007 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1008 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1009 if (iascii || isdraw || isbinary || issocket) { 1010 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1011 } else { 1012 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1013 } 1014 PetscFunctionReturn(0); 1015 } 1016 1017 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "MatRelax_MPIAIJ" 1021 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1022 { 1023 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1024 PetscErrorCode ierr; 1025 Vec bb1; 1026 1027 PetscFunctionBegin; 1028 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1029 1030 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1031 1032 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1033 if (flag & SOR_ZERO_INITIAL_GUESS) { 1034 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1035 its--; 1036 } 1037 1038 while (its--) { 1039 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1040 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1041 1042 /* update rhs: bb1 = bb - B*x */ 1043 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1044 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1045 1046 /* local sweep */ 1047 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1048 CHKERRQ(ierr); 1049 } 1050 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1051 if (flag & SOR_ZERO_INITIAL_GUESS) { 1052 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1053 its--; 1054 } 1055 while (its--) { 1056 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1057 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1058 1059 /* update rhs: bb1 = bb - B*x */ 1060 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1061 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1062 1063 /* local sweep */ 1064 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1065 CHKERRQ(ierr); 1066 } 1067 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1068 if (flag & SOR_ZERO_INITIAL_GUESS) { 1069 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1070 its--; 1071 } 1072 while (its--) { 1073 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1074 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1075 1076 /* update rhs: bb1 = bb - B*x */ 1077 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1078 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1079 1080 /* local sweep */ 1081 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1082 CHKERRQ(ierr); 1083 } 1084 } else { 1085 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1086 } 1087 1088 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatPermute_MPIAIJ" 1094 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1095 { 1096 MPI_Comm comm,pcomm; 1097 PetscInt first,local_size,nrows,*rows; 1098 int ntids; 1099 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1100 PetscErrorCode ierr; 1101 1102 PetscFunctionBegin; 1103 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1104 /* make a collective version of 'rowp' */ 1105 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1106 if (pcomm==comm) { 1107 crowp = rowp; 1108 } else { 1109 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1110 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1111 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1112 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1113 } 1114 /* collect the global row permutation and invert it */ 1115 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1116 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1117 if (pcomm!=comm) { 1118 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1119 } 1120 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1121 /* get the local target indices */ 1122 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1123 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1124 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1125 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1126 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1127 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1128 /* the column permutation is so much easier; 1129 make a local version of 'colp' and invert it */ 1130 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1131 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1132 if (ntids==1) { 1133 lcolp = colp; 1134 } else { 1135 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1136 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1137 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1138 } 1139 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1140 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1141 if (ntids>1) { 1142 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1143 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1144 } 1145 /* now we just get the submatrix */ 1146 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1147 /* clean up */ 1148 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1149 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1150 PetscFunctionReturn(0); 1151 } 1152 1153 #undef __FUNCT__ 1154 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1155 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1156 { 1157 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1158 Mat A = mat->A,B = mat->B; 1159 PetscErrorCode ierr; 1160 PetscReal isend[5],irecv[5]; 1161 1162 PetscFunctionBegin; 1163 info->block_size = 1.0; 1164 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1165 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1166 isend[3] = info->memory; isend[4] = info->mallocs; 1167 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1168 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1169 isend[3] += info->memory; isend[4] += info->mallocs; 1170 if (flag == MAT_LOCAL) { 1171 info->nz_used = isend[0]; 1172 info->nz_allocated = isend[1]; 1173 info->nz_unneeded = isend[2]; 1174 info->memory = isend[3]; 1175 info->mallocs = isend[4]; 1176 } else if (flag == MAT_GLOBAL_MAX) { 1177 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1178 info->nz_used = irecv[0]; 1179 info->nz_allocated = irecv[1]; 1180 info->nz_unneeded = irecv[2]; 1181 info->memory = irecv[3]; 1182 info->mallocs = irecv[4]; 1183 } else if (flag == MAT_GLOBAL_SUM) { 1184 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1185 info->nz_used = irecv[0]; 1186 info->nz_allocated = irecv[1]; 1187 info->nz_unneeded = irecv[2]; 1188 info->memory = irecv[3]; 1189 info->mallocs = irecv[4]; 1190 } 1191 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1192 info->fill_ratio_needed = 0; 1193 info->factor_mallocs = 0; 1194 info->rows_global = (double)matin->M; 1195 info->columns_global = (double)matin->N; 1196 info->rows_local = (double)matin->m; 1197 info->columns_local = (double)matin->N; 1198 1199 PetscFunctionReturn(0); 1200 } 1201 1202 #undef __FUNCT__ 1203 #define __FUNCT__ "MatSetOption_MPIAIJ" 1204 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1205 { 1206 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1207 PetscErrorCode ierr; 1208 1209 PetscFunctionBegin; 1210 switch (op) { 1211 case MAT_NO_NEW_NONZERO_LOCATIONS: 1212 case MAT_YES_NEW_NONZERO_LOCATIONS: 1213 case MAT_COLUMNS_UNSORTED: 1214 case MAT_COLUMNS_SORTED: 1215 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1216 case MAT_KEEP_ZEROED_ROWS: 1217 case MAT_NEW_NONZERO_LOCATION_ERR: 1218 case MAT_USE_INODES: 1219 case MAT_DO_NOT_USE_INODES: 1220 case MAT_IGNORE_ZERO_ENTRIES: 1221 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1222 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1223 break; 1224 case MAT_ROW_ORIENTED: 1225 a->roworiented = PETSC_TRUE; 1226 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1227 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1228 break; 1229 case MAT_ROWS_SORTED: 1230 case MAT_ROWS_UNSORTED: 1231 case MAT_YES_NEW_DIAGONALS: 1232 ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr); 1233 break; 1234 case MAT_COLUMN_ORIENTED: 1235 a->roworiented = PETSC_FALSE; 1236 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1237 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1238 break; 1239 case MAT_IGNORE_OFF_PROC_ENTRIES: 1240 a->donotstash = PETSC_TRUE; 1241 break; 1242 case MAT_NO_NEW_DIAGONALS: 1243 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1244 case MAT_SYMMETRIC: 1245 case MAT_STRUCTURALLY_SYMMETRIC: 1246 case MAT_HERMITIAN: 1247 case MAT_SYMMETRY_ETERNAL: 1248 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1249 break; 1250 case MAT_NOT_SYMMETRIC: 1251 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1252 case MAT_NOT_HERMITIAN: 1253 case MAT_NOT_SYMMETRY_ETERNAL: 1254 break; 1255 default: 1256 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1257 } 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatGetRow_MPIAIJ" 1263 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1264 { 1265 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1266 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1267 PetscErrorCode ierr; 1268 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1269 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1270 PetscInt *cmap,*idx_p; 1271 1272 PetscFunctionBegin; 1273 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1274 mat->getrowactive = PETSC_TRUE; 1275 1276 if (!mat->rowvalues && (idx || v)) { 1277 /* 1278 allocate enough space to hold information from the longest row. 1279 */ 1280 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1281 PetscInt max = 1,tmp; 1282 for (i=0; i<matin->m; i++) { 1283 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1284 if (max < tmp) { max = tmp; } 1285 } 1286 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1287 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1288 } 1289 1290 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1291 lrow = row - rstart; 1292 1293 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1294 if (!v) {pvA = 0; pvB = 0;} 1295 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1296 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1297 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1298 nztot = nzA + nzB; 1299 1300 cmap = mat->garray; 1301 if (v || idx) { 1302 if (nztot) { 1303 /* Sort by increasing column numbers, assuming A and B already sorted */ 1304 PetscInt imark = -1; 1305 if (v) { 1306 *v = v_p = mat->rowvalues; 1307 for (i=0; i<nzB; i++) { 1308 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1309 else break; 1310 } 1311 imark = i; 1312 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1313 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1314 } 1315 if (idx) { 1316 *idx = idx_p = mat->rowindices; 1317 if (imark > -1) { 1318 for (i=0; i<imark; i++) { 1319 idx_p[i] = cmap[cworkB[i]]; 1320 } 1321 } else { 1322 for (i=0; i<nzB; i++) { 1323 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1324 else break; 1325 } 1326 imark = i; 1327 } 1328 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1329 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1330 } 1331 } else { 1332 if (idx) *idx = 0; 1333 if (v) *v = 0; 1334 } 1335 } 1336 *nz = nztot; 1337 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1338 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1344 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1345 { 1346 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1347 1348 PetscFunctionBegin; 1349 if (!aij->getrowactive) { 1350 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1351 } 1352 aij->getrowactive = PETSC_FALSE; 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatNorm_MPIAIJ" 1358 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1359 { 1360 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1361 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1362 PetscErrorCode ierr; 1363 PetscInt i,j,cstart = aij->cstart; 1364 PetscReal sum = 0.0; 1365 PetscScalar *v; 1366 1367 PetscFunctionBegin; 1368 if (aij->size == 1) { 1369 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1370 } else { 1371 if (type == NORM_FROBENIUS) { 1372 v = amat->a; 1373 for (i=0; i<amat->nz; i++) { 1374 #if defined(PETSC_USE_COMPLEX) 1375 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1376 #else 1377 sum += (*v)*(*v); v++; 1378 #endif 1379 } 1380 v = bmat->a; 1381 for (i=0; i<bmat->nz; i++) { 1382 #if defined(PETSC_USE_COMPLEX) 1383 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1384 #else 1385 sum += (*v)*(*v); v++; 1386 #endif 1387 } 1388 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1389 *norm = sqrt(*norm); 1390 } else if (type == NORM_1) { /* max column norm */ 1391 PetscReal *tmp,*tmp2; 1392 PetscInt *jj,*garray = aij->garray; 1393 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1394 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1395 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1396 *norm = 0.0; 1397 v = amat->a; jj = amat->j; 1398 for (j=0; j<amat->nz; j++) { 1399 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1400 } 1401 v = bmat->a; jj = bmat->j; 1402 for (j=0; j<bmat->nz; j++) { 1403 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1404 } 1405 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1406 for (j=0; j<mat->N; j++) { 1407 if (tmp2[j] > *norm) *norm = tmp2[j]; 1408 } 1409 ierr = PetscFree(tmp);CHKERRQ(ierr); 1410 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1411 } else if (type == NORM_INFINITY) { /* max row norm */ 1412 PetscReal ntemp = 0.0; 1413 for (j=0; j<aij->A->m; j++) { 1414 v = amat->a + amat->i[j]; 1415 sum = 0.0; 1416 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1417 sum += PetscAbsScalar(*v); v++; 1418 } 1419 v = bmat->a + bmat->i[j]; 1420 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1421 sum += PetscAbsScalar(*v); v++; 1422 } 1423 if (sum > ntemp) ntemp = sum; 1424 } 1425 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1426 } else { 1427 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1428 } 1429 } 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatTranspose_MPIAIJ" 1435 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1436 { 1437 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1438 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1439 PetscErrorCode ierr; 1440 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1441 Mat B; 1442 PetscScalar *array; 1443 1444 PetscFunctionBegin; 1445 if (!matout && M != N) { 1446 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1447 } 1448 1449 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1450 ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr); 1451 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1452 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1453 1454 /* copy over the A part */ 1455 Aloc = (Mat_SeqAIJ*)a->A->data; 1456 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1457 row = a->rstart; 1458 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1459 for (i=0; i<m; i++) { 1460 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1461 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1462 } 1463 aj = Aloc->j; 1464 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1465 1466 /* copy over the B part */ 1467 Aloc = (Mat_SeqAIJ*)a->B->data; 1468 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1469 row = a->rstart; 1470 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1471 ct = cols; 1472 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1473 for (i=0; i<m; i++) { 1474 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1475 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1476 } 1477 ierr = PetscFree(ct);CHKERRQ(ierr); 1478 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1479 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1480 if (matout) { 1481 *matout = B; 1482 } else { 1483 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1484 } 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1490 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1491 { 1492 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1493 Mat a = aij->A,b = aij->B; 1494 PetscErrorCode ierr; 1495 PetscInt s1,s2,s3; 1496 1497 PetscFunctionBegin; 1498 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1499 if (rr) { 1500 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1501 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1502 /* Overlap communication with computation. */ 1503 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1504 } 1505 if (ll) { 1506 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1507 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1508 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1509 } 1510 /* scale the diagonal block */ 1511 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1512 1513 if (rr) { 1514 /* Do a scatter end and then right scale the off-diagonal block */ 1515 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1516 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1517 } 1518 1519 PetscFunctionReturn(0); 1520 } 1521 1522 1523 #undef __FUNCT__ 1524 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1525 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1526 { 1527 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1528 PetscErrorCode ierr; 1529 1530 PetscFunctionBegin; 1531 if (!a->rank) { 1532 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1533 } 1534 PetscFunctionReturn(0); 1535 } 1536 1537 #undef __FUNCT__ 1538 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1539 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1540 { 1541 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1542 PetscErrorCode ierr; 1543 1544 PetscFunctionBegin; 1545 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1546 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1547 PetscFunctionReturn(0); 1548 } 1549 #undef __FUNCT__ 1550 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1551 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1552 { 1553 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1554 PetscErrorCode ierr; 1555 1556 PetscFunctionBegin; 1557 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatEqual_MPIAIJ" 1563 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1564 { 1565 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1566 Mat a,b,c,d; 1567 PetscTruth flg; 1568 PetscErrorCode ierr; 1569 1570 PetscFunctionBegin; 1571 a = matA->A; b = matA->B; 1572 c = matB->A; d = matB->B; 1573 1574 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1575 if (flg) { 1576 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1577 } 1578 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1579 PetscFunctionReturn(0); 1580 } 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "MatCopy_MPIAIJ" 1584 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1585 { 1586 PetscErrorCode ierr; 1587 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1588 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1589 1590 PetscFunctionBegin; 1591 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1592 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1593 /* because of the column compression in the off-processor part of the matrix a->B, 1594 the number of columns in a->B and b->B may be different, hence we cannot call 1595 the MatCopy() directly on the two parts. If need be, we can provide a more 1596 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1597 then copying the submatrices */ 1598 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1599 } else { 1600 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1601 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1602 } 1603 PetscFunctionReturn(0); 1604 } 1605 1606 #undef __FUNCT__ 1607 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1608 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1609 { 1610 PetscErrorCode ierr; 1611 1612 PetscFunctionBegin; 1613 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 #include "petscblaslapack.h" 1618 #undef __FUNCT__ 1619 #define __FUNCT__ "MatAXPY_MPIAIJ" 1620 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1621 { 1622 PetscErrorCode ierr; 1623 PetscInt i; 1624 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1625 PetscBLASInt bnz,one=1; 1626 Mat_SeqAIJ *x,*y; 1627 1628 PetscFunctionBegin; 1629 if (str == SAME_NONZERO_PATTERN) { 1630 PetscScalar alpha = a; 1631 x = (Mat_SeqAIJ *)xx->A->data; 1632 y = (Mat_SeqAIJ *)yy->A->data; 1633 bnz = (PetscBLASInt)x->nz; 1634 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1635 x = (Mat_SeqAIJ *)xx->B->data; 1636 y = (Mat_SeqAIJ *)yy->B->data; 1637 bnz = (PetscBLASInt)x->nz; 1638 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1639 } else if (str == SUBSET_NONZERO_PATTERN) { 1640 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1641 1642 x = (Mat_SeqAIJ *)xx->B->data; 1643 y = (Mat_SeqAIJ *)yy->B->data; 1644 if (y->xtoy && y->XtoY != xx->B) { 1645 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1646 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1647 } 1648 if (!y->xtoy) { /* get xtoy */ 1649 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1650 y->XtoY = xx->B; 1651 } 1652 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1653 } else { 1654 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1655 } 1656 PetscFunctionReturn(0); 1657 } 1658 1659 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1660 1661 #undef __FUNCT__ 1662 #define __FUNCT__ "MatConjugate_MPIAIJ" 1663 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1664 { 1665 #if defined(PETSC_USE_COMPLEX) 1666 PetscErrorCode ierr; 1667 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1668 1669 PetscFunctionBegin; 1670 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1671 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1672 #else 1673 PetscFunctionBegin; 1674 #endif 1675 PetscFunctionReturn(0); 1676 } 1677 1678 #undef __FUNCT__ 1679 #define __FUNCT__ "MatRealPart_MPIAIJ" 1680 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1681 { 1682 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1683 PetscErrorCode ierr; 1684 1685 PetscFunctionBegin; 1686 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1687 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1693 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1694 { 1695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1696 PetscErrorCode ierr; 1697 1698 PetscFunctionBegin; 1699 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1700 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 1704 /* -------------------------------------------------------------------*/ 1705 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1706 MatGetRow_MPIAIJ, 1707 MatRestoreRow_MPIAIJ, 1708 MatMult_MPIAIJ, 1709 /* 4*/ MatMultAdd_MPIAIJ, 1710 MatMultTranspose_MPIAIJ, 1711 MatMultTransposeAdd_MPIAIJ, 1712 0, 1713 0, 1714 0, 1715 /*10*/ 0, 1716 0, 1717 0, 1718 MatRelax_MPIAIJ, 1719 MatTranspose_MPIAIJ, 1720 /*15*/ MatGetInfo_MPIAIJ, 1721 MatEqual_MPIAIJ, 1722 MatGetDiagonal_MPIAIJ, 1723 MatDiagonalScale_MPIAIJ, 1724 MatNorm_MPIAIJ, 1725 /*20*/ MatAssemblyBegin_MPIAIJ, 1726 MatAssemblyEnd_MPIAIJ, 1727 0, 1728 MatSetOption_MPIAIJ, 1729 MatZeroEntries_MPIAIJ, 1730 /*25*/ MatZeroRows_MPIAIJ, 1731 0, 1732 0, 1733 0, 1734 0, 1735 /*30*/ MatSetUpPreallocation_MPIAIJ, 1736 0, 1737 0, 1738 0, 1739 0, 1740 /*35*/ MatDuplicate_MPIAIJ, 1741 0, 1742 0, 1743 0, 1744 0, 1745 /*40*/ MatAXPY_MPIAIJ, 1746 MatGetSubMatrices_MPIAIJ, 1747 MatIncreaseOverlap_MPIAIJ, 1748 MatGetValues_MPIAIJ, 1749 MatCopy_MPIAIJ, 1750 /*45*/ MatPrintHelp_MPIAIJ, 1751 MatScale_MPIAIJ, 1752 0, 1753 0, 1754 0, 1755 /*50*/ MatSetBlockSize_MPIAIJ, 1756 0, 1757 0, 1758 0, 1759 0, 1760 /*55*/ MatFDColoringCreate_MPIAIJ, 1761 0, 1762 MatSetUnfactored_MPIAIJ, 1763 MatPermute_MPIAIJ, 1764 0, 1765 /*60*/ MatGetSubMatrix_MPIAIJ, 1766 MatDestroy_MPIAIJ, 1767 MatView_MPIAIJ, 1768 MatGetPetscMaps_Petsc, 1769 0, 1770 /*65*/ 0, 1771 0, 1772 0, 1773 0, 1774 0, 1775 /*70*/ 0, 1776 0, 1777 MatSetColoring_MPIAIJ, 1778 #if defined(PETSC_HAVE_ADIC) 1779 MatSetValuesAdic_MPIAIJ, 1780 #else 1781 0, 1782 #endif 1783 MatSetValuesAdifor_MPIAIJ, 1784 /*75*/ 0, 1785 0, 1786 0, 1787 0, 1788 0, 1789 /*80*/ 0, 1790 0, 1791 0, 1792 0, 1793 /*84*/ MatLoad_MPIAIJ, 1794 0, 1795 0, 1796 0, 1797 0, 1798 0, 1799 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1800 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1801 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1802 MatPtAP_Basic, 1803 MatPtAPSymbolic_MPIAIJ, 1804 /*95*/ MatPtAPNumeric_MPIAIJ, 1805 0, 1806 0, 1807 0, 1808 0, 1809 /*100*/0, 1810 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1811 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1812 MatConjugate_MPIAIJ, 1813 0, 1814 /*105*/MatSetValuesRow_MPIAIJ, 1815 MatRealPart_MPIAIJ, 1816 MatImaginaryPart_MPIAIJ}; 1817 1818 /* ----------------------------------------------------------------------------------------*/ 1819 1820 EXTERN_C_BEGIN 1821 #undef __FUNCT__ 1822 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1823 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1824 { 1825 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1826 PetscErrorCode ierr; 1827 1828 PetscFunctionBegin; 1829 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1830 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1831 PetscFunctionReturn(0); 1832 } 1833 EXTERN_C_END 1834 1835 EXTERN_C_BEGIN 1836 #undef __FUNCT__ 1837 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1838 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1839 { 1840 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1841 PetscErrorCode ierr; 1842 1843 PetscFunctionBegin; 1844 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1845 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1846 PetscFunctionReturn(0); 1847 } 1848 EXTERN_C_END 1849 1850 #include "petscpc.h" 1851 EXTERN_C_BEGIN 1852 #undef __FUNCT__ 1853 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1854 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1855 { 1856 Mat_MPIAIJ *b; 1857 PetscErrorCode ierr; 1858 PetscInt i; 1859 1860 PetscFunctionBegin; 1861 B->preallocated = PETSC_TRUE; 1862 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1863 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1864 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1865 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1866 if (d_nnz) { 1867 for (i=0; i<B->m; i++) { 1868 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1869 } 1870 } 1871 if (o_nnz) { 1872 for (i=0; i<B->m; i++) { 1873 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1874 } 1875 } 1876 b = (Mat_MPIAIJ*)B->data; 1877 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1878 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1879 1880 PetscFunctionReturn(0); 1881 } 1882 EXTERN_C_END 1883 1884 #undef __FUNCT__ 1885 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1886 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1887 { 1888 Mat mat; 1889 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1890 PetscErrorCode ierr; 1891 1892 PetscFunctionBegin; 1893 *newmat = 0; 1894 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1895 ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr); 1896 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1897 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1898 a = (Mat_MPIAIJ*)mat->data; 1899 1900 mat->factor = matin->factor; 1901 mat->bs = matin->bs; 1902 mat->assembled = PETSC_TRUE; 1903 mat->insertmode = NOT_SET_VALUES; 1904 mat->preallocated = PETSC_TRUE; 1905 1906 a->rstart = oldmat->rstart; 1907 a->rend = oldmat->rend; 1908 a->cstart = oldmat->cstart; 1909 a->cend = oldmat->cend; 1910 a->size = oldmat->size; 1911 a->rank = oldmat->rank; 1912 a->donotstash = oldmat->donotstash; 1913 a->roworiented = oldmat->roworiented; 1914 a->rowindices = 0; 1915 a->rowvalues = 0; 1916 a->getrowactive = PETSC_FALSE; 1917 1918 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1919 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1920 if (oldmat->colmap) { 1921 #if defined (PETSC_USE_CTABLE) 1922 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1923 #else 1924 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1925 ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1926 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1927 #endif 1928 } else a->colmap = 0; 1929 if (oldmat->garray) { 1930 PetscInt len; 1931 len = oldmat->B->n; 1932 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1933 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1934 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1935 } else a->garray = 0; 1936 1937 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1938 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1939 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1940 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1941 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1942 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1943 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1944 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1945 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1946 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1947 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1948 *newmat = mat; 1949 PetscFunctionReturn(0); 1950 } 1951 1952 #include "petscsys.h" 1953 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatLoad_MPIAIJ" 1956 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 1957 { 1958 Mat A; 1959 PetscScalar *vals,*svals; 1960 MPI_Comm comm = ((PetscObject)viewer)->comm; 1961 MPI_Status status; 1962 PetscErrorCode ierr; 1963 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1964 PetscInt i,nz,j,rstart,rend,mmax; 1965 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1966 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1967 PetscInt cend,cstart,n,*rowners; 1968 int fd; 1969 1970 PetscFunctionBegin; 1971 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1972 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1973 if (!rank) { 1974 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1975 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1976 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1977 } 1978 1979 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1980 M = header[1]; N = header[2]; 1981 /* determine ownership of all rows */ 1982 m = M/size + ((M % size) > rank); 1983 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1984 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1985 1986 /* First process needs enough room for process with most rows */ 1987 if (!rank) { 1988 mmax = rowners[1]; 1989 for (i=2; i<size; i++) { 1990 mmax = PetscMax(mmax,rowners[i]); 1991 } 1992 } else mmax = m; 1993 1994 rowners[0] = 0; 1995 for (i=2; i<=size; i++) { 1996 mmax = PetscMax(mmax,rowners[i]); 1997 rowners[i] += rowners[i-1]; 1998 } 1999 rstart = rowners[rank]; 2000 rend = rowners[rank+1]; 2001 2002 /* distribute row lengths to all processors */ 2003 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2004 if (!rank) { 2005 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2006 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2007 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2008 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2009 for (j=0; j<m; j++) { 2010 procsnz[0] += ourlens[j]; 2011 } 2012 for (i=1; i<size; i++) { 2013 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2014 /* calculate the number of nonzeros on each processor */ 2015 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2016 procsnz[i] += rowlengths[j]; 2017 } 2018 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2019 } 2020 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2021 } else { 2022 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2023 } 2024 2025 if (!rank) { 2026 /* determine max buffer needed and allocate it */ 2027 maxnz = 0; 2028 for (i=0; i<size; i++) { 2029 maxnz = PetscMax(maxnz,procsnz[i]); 2030 } 2031 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2032 2033 /* read in my part of the matrix column indices */ 2034 nz = procsnz[0]; 2035 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2036 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2037 2038 /* read in every one elses and ship off */ 2039 for (i=1; i<size; i++) { 2040 nz = procsnz[i]; 2041 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2042 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2043 } 2044 ierr = PetscFree(cols);CHKERRQ(ierr); 2045 } else { 2046 /* determine buffer space needed for message */ 2047 nz = 0; 2048 for (i=0; i<m; i++) { 2049 nz += ourlens[i]; 2050 } 2051 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2052 2053 /* receive message of column indices*/ 2054 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2055 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2056 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2057 } 2058 2059 /* determine column ownership if matrix is not square */ 2060 if (N != M) { 2061 n = N/size + ((N % size) > rank); 2062 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2063 cstart = cend - n; 2064 } else { 2065 cstart = rstart; 2066 cend = rend; 2067 n = cend - cstart; 2068 } 2069 2070 /* loop over local rows, determining number of off diagonal entries */ 2071 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2072 jj = 0; 2073 for (i=0; i<m; i++) { 2074 for (j=0; j<ourlens[i]; j++) { 2075 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2076 jj++; 2077 } 2078 } 2079 2080 /* create our matrix */ 2081 for (i=0; i<m; i++) { 2082 ourlens[i] -= offlens[i]; 2083 } 2084 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2085 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2086 ierr = MatSetType(A,type);CHKERRQ(ierr); 2087 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2088 2089 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2090 for (i=0; i<m; i++) { 2091 ourlens[i] += offlens[i]; 2092 } 2093 2094 if (!rank) { 2095 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2096 2097 /* read in my part of the matrix numerical values */ 2098 nz = procsnz[0]; 2099 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2100 2101 /* insert into matrix */ 2102 jj = rstart; 2103 smycols = mycols; 2104 svals = vals; 2105 for (i=0; i<m; i++) { 2106 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2107 smycols += ourlens[i]; 2108 svals += ourlens[i]; 2109 jj++; 2110 } 2111 2112 /* read in other processors and ship out */ 2113 for (i=1; i<size; i++) { 2114 nz = procsnz[i]; 2115 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2116 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2117 } 2118 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2119 } else { 2120 /* receive numeric values */ 2121 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2122 2123 /* receive message of values*/ 2124 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2125 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2126 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2127 2128 /* insert into matrix */ 2129 jj = rstart; 2130 smycols = mycols; 2131 svals = vals; 2132 for (i=0; i<m; i++) { 2133 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2134 smycols += ourlens[i]; 2135 svals += ourlens[i]; 2136 jj++; 2137 } 2138 } 2139 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2140 ierr = PetscFree(vals);CHKERRQ(ierr); 2141 ierr = PetscFree(mycols);CHKERRQ(ierr); 2142 ierr = PetscFree(rowners);CHKERRQ(ierr); 2143 2144 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2145 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2146 *newmat = A; 2147 PetscFunctionReturn(0); 2148 } 2149 2150 #undef __FUNCT__ 2151 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2152 /* 2153 Not great since it makes two copies of the submatrix, first an SeqAIJ 2154 in local and then by concatenating the local matrices the end result. 2155 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2156 */ 2157 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2158 { 2159 PetscErrorCode ierr; 2160 PetscMPIInt rank,size; 2161 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2162 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2163 Mat *local,M,Mreuse; 2164 PetscScalar *vwork,*aa; 2165 MPI_Comm comm = mat->comm; 2166 Mat_SeqAIJ *aij; 2167 2168 2169 PetscFunctionBegin; 2170 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2171 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2172 2173 if (call == MAT_REUSE_MATRIX) { 2174 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2175 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2176 local = &Mreuse; 2177 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2178 } else { 2179 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2180 Mreuse = *local; 2181 ierr = PetscFree(local);CHKERRQ(ierr); 2182 } 2183 2184 /* 2185 m - number of local rows 2186 n - number of columns (same on all processors) 2187 rstart - first row in new global matrix generated 2188 */ 2189 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2190 if (call == MAT_INITIAL_MATRIX) { 2191 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2192 ii = aij->i; 2193 jj = aij->j; 2194 2195 /* 2196 Determine the number of non-zeros in the diagonal and off-diagonal 2197 portions of the matrix in order to do correct preallocation 2198 */ 2199 2200 /* first get start and end of "diagonal" columns */ 2201 if (csize == PETSC_DECIDE) { 2202 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2203 if (mglobal == n) { /* square matrix */ 2204 nlocal = m; 2205 } else { 2206 nlocal = n/size + ((n % size) > rank); 2207 } 2208 } else { 2209 nlocal = csize; 2210 } 2211 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2212 rstart = rend - nlocal; 2213 if (rank == size - 1 && rend != n) { 2214 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2215 } 2216 2217 /* next, compute all the lengths */ 2218 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2219 olens = dlens + m; 2220 for (i=0; i<m; i++) { 2221 jend = ii[i+1] - ii[i]; 2222 olen = 0; 2223 dlen = 0; 2224 for (j=0; j<jend; j++) { 2225 if (*jj < rstart || *jj >= rend) olen++; 2226 else dlen++; 2227 jj++; 2228 } 2229 olens[i] = olen; 2230 dlens[i] = dlen; 2231 } 2232 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2233 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2234 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2235 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2236 ierr = PetscFree(dlens);CHKERRQ(ierr); 2237 } else { 2238 PetscInt ml,nl; 2239 2240 M = *newmat; 2241 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2242 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2243 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2244 /* 2245 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2246 rather than the slower MatSetValues(). 2247 */ 2248 M->was_assembled = PETSC_TRUE; 2249 M->assembled = PETSC_FALSE; 2250 } 2251 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2252 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2253 ii = aij->i; 2254 jj = aij->j; 2255 aa = aij->a; 2256 for (i=0; i<m; i++) { 2257 row = rstart + i; 2258 nz = ii[i+1] - ii[i]; 2259 cwork = jj; jj += nz; 2260 vwork = aa; aa += nz; 2261 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2262 } 2263 2264 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2265 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2266 *newmat = M; 2267 2268 /* save submatrix used in processor for next request */ 2269 if (call == MAT_INITIAL_MATRIX) { 2270 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2271 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2272 } 2273 2274 PetscFunctionReturn(0); 2275 } 2276 2277 EXTERN_C_BEGIN 2278 #undef __FUNCT__ 2279 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2280 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2281 { 2282 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2283 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2284 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2285 const PetscInt *JJ; 2286 PetscScalar *values; 2287 PetscErrorCode ierr; 2288 2289 PetscFunctionBegin; 2290 if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]); 2291 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2292 o_nnz = d_nnz + m; 2293 2294 for (i=0; i<m; i++) { 2295 nnz = I[i+1]- I[i]; 2296 JJ = J + I[i]; 2297 nnz_max = PetscMax(nnz_max,nnz); 2298 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2299 for (j=0; j<nnz; j++) { 2300 if (*JJ >= cstart) break; 2301 JJ++; 2302 } 2303 d = 0; 2304 for (; j<nnz; j++) { 2305 if (*JJ++ >= cend) break; 2306 d++; 2307 } 2308 d_nnz[i] = d; 2309 o_nnz[i] = nnz - d; 2310 } 2311 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2312 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2313 2314 if (v) values = (PetscScalar*)v; 2315 else { 2316 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2317 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2318 } 2319 2320 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2321 for (i=0; i<m; i++) { 2322 ii = i + rstart; 2323 nnz = I[i+1]- I[i]; 2324 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2325 } 2326 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2327 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2328 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2329 2330 if (!v) { 2331 ierr = PetscFree(values);CHKERRQ(ierr); 2332 } 2333 PetscFunctionReturn(0); 2334 } 2335 EXTERN_C_END 2336 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2339 /*@ 2340 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2341 (the default parallel PETSc format). 2342 2343 Collective on MPI_Comm 2344 2345 Input Parameters: 2346 + B - the matrix 2347 . i - the indices into j for the start of each local row (starts with zero) 2348 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2349 - v - optional values in the matrix 2350 2351 Level: developer 2352 2353 .keywords: matrix, aij, compressed row, sparse, parallel 2354 2355 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2356 @*/ 2357 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2358 { 2359 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2360 2361 PetscFunctionBegin; 2362 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2363 if (f) { 2364 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2365 } 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2371 /*@C 2372 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2373 (the default parallel PETSc format). For good matrix assembly performance 2374 the user should preallocate the matrix storage by setting the parameters 2375 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2376 performance can be increased by more than a factor of 50. 2377 2378 Collective on MPI_Comm 2379 2380 Input Parameters: 2381 + A - the matrix 2382 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2383 (same value is used for all local rows) 2384 . d_nnz - array containing the number of nonzeros in the various rows of the 2385 DIAGONAL portion of the local submatrix (possibly different for each row) 2386 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2387 The size of this array is equal to the number of local rows, i.e 'm'. 2388 You must leave room for the diagonal entry even if it is zero. 2389 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2390 submatrix (same value is used for all local rows). 2391 - o_nnz - array containing the number of nonzeros in the various rows of the 2392 OFF-DIAGONAL portion of the local submatrix (possibly different for 2393 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2394 structure. The size of this array is equal to the number 2395 of local rows, i.e 'm'. 2396 2397 If the *_nnz parameter is given then the *_nz parameter is ignored 2398 2399 The AIJ format (also called the Yale sparse matrix format or 2400 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2401 storage. The stored row and column indices begin with zero. See the users manual for details. 2402 2403 The parallel matrix is partitioned such that the first m0 rows belong to 2404 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2405 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2406 2407 The DIAGONAL portion of the local submatrix of a processor can be defined 2408 as the submatrix which is obtained by extraction the part corresponding 2409 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2410 first row that belongs to the processor, and r2 is the last row belonging 2411 to the this processor. This is a square mxm matrix. The remaining portion 2412 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2413 2414 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2415 2416 Example usage: 2417 2418 Consider the following 8x8 matrix with 34 non-zero values, that is 2419 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2420 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2421 as follows: 2422 2423 .vb 2424 1 2 0 | 0 3 0 | 0 4 2425 Proc0 0 5 6 | 7 0 0 | 8 0 2426 9 0 10 | 11 0 0 | 12 0 2427 ------------------------------------- 2428 13 0 14 | 15 16 17 | 0 0 2429 Proc1 0 18 0 | 19 20 21 | 0 0 2430 0 0 0 | 22 23 0 | 24 0 2431 ------------------------------------- 2432 Proc2 25 26 27 | 0 0 28 | 29 0 2433 30 0 0 | 31 32 33 | 0 34 2434 .ve 2435 2436 This can be represented as a collection of submatrices as: 2437 2438 .vb 2439 A B C 2440 D E F 2441 G H I 2442 .ve 2443 2444 Where the submatrices A,B,C are owned by proc0, D,E,F are 2445 owned by proc1, G,H,I are owned by proc2. 2446 2447 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2448 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2449 The 'M','N' parameters are 8,8, and have the same values on all procs. 2450 2451 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2452 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2453 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2454 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2455 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2456 matrix, ans [DF] as another SeqAIJ matrix. 2457 2458 When d_nz, o_nz parameters are specified, d_nz storage elements are 2459 allocated for every row of the local diagonal submatrix, and o_nz 2460 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2461 One way to choose d_nz and o_nz is to use the max nonzerors per local 2462 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2463 In this case, the values of d_nz,o_nz are: 2464 .vb 2465 proc0 : dnz = 2, o_nz = 2 2466 proc1 : dnz = 3, o_nz = 2 2467 proc2 : dnz = 1, o_nz = 4 2468 .ve 2469 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2470 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2471 for proc3. i.e we are using 12+15+10=37 storage locations to store 2472 34 values. 2473 2474 When d_nnz, o_nnz parameters are specified, the storage is specified 2475 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2476 In the above case the values for d_nnz,o_nnz are: 2477 .vb 2478 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2479 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2480 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2481 .ve 2482 Here the space allocated is sum of all the above values i.e 34, and 2483 hence pre-allocation is perfect. 2484 2485 Level: intermediate 2486 2487 .keywords: matrix, aij, compressed row, sparse, parallel 2488 2489 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2490 MPIAIJ 2491 @*/ 2492 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2493 { 2494 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2495 2496 PetscFunctionBegin; 2497 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2498 if (f) { 2499 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2500 } 2501 PetscFunctionReturn(0); 2502 } 2503 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatCreateMPIAIJ" 2506 /*@C 2507 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2508 (the default parallel PETSc format). For good matrix assembly performance 2509 the user should preallocate the matrix storage by setting the parameters 2510 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2511 performance can be increased by more than a factor of 50. 2512 2513 Collective on MPI_Comm 2514 2515 Input Parameters: 2516 + comm - MPI communicator 2517 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2518 This value should be the same as the local size used in creating the 2519 y vector for the matrix-vector product y = Ax. 2520 . n - This value should be the same as the local size used in creating the 2521 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2522 calculated if N is given) For square matrices n is almost always m. 2523 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2524 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2525 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2526 (same value is used for all local rows) 2527 . d_nnz - array containing the number of nonzeros in the various rows of the 2528 DIAGONAL portion of the local submatrix (possibly different for each row) 2529 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2530 The size of this array is equal to the number of local rows, i.e 'm'. 2531 You must leave room for the diagonal entry even if it is zero. 2532 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2533 submatrix (same value is used for all local rows). 2534 - o_nnz - array containing the number of nonzeros in the various rows of the 2535 OFF-DIAGONAL portion of the local submatrix (possibly different for 2536 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2537 structure. The size of this array is equal to the number 2538 of local rows, i.e 'm'. 2539 2540 Output Parameter: 2541 . A - the matrix 2542 2543 Notes: 2544 If the *_nnz parameter is given then the *_nz parameter is ignored 2545 2546 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2547 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2548 storage requirements for this matrix. 2549 2550 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2551 processor than it must be used on all processors that share the object for 2552 that argument. 2553 2554 The user MUST specify either the local or global matrix dimensions 2555 (possibly both). 2556 2557 The parallel matrix is partitioned such that the first m0 rows belong to 2558 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2559 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2560 2561 The DIAGONAL portion of the local submatrix of a processor can be defined 2562 as the submatrix which is obtained by extraction the part corresponding 2563 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2564 first row that belongs to the processor, and r2 is the last row belonging 2565 to the this processor. This is a square mxm matrix. The remaining portion 2566 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2567 2568 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2569 2570 When calling this routine with a single process communicator, a matrix of 2571 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2572 type of communicator, use the construction mechanism: 2573 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2574 2575 By default, this format uses inodes (identical nodes) when possible. 2576 We search for consecutive rows with the same nonzero structure, thereby 2577 reusing matrix information to achieve increased efficiency. 2578 2579 Options Database Keys: 2580 + -mat_no_inode - Do not use inodes 2581 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2582 - -mat_aij_oneindex - Internally use indexing starting at 1 2583 rather than 0. Note that when calling MatSetValues(), 2584 the user still MUST index entries starting at 0! 2585 2586 2587 Example usage: 2588 2589 Consider the following 8x8 matrix with 34 non-zero values, that is 2590 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2591 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2592 as follows: 2593 2594 .vb 2595 1 2 0 | 0 3 0 | 0 4 2596 Proc0 0 5 6 | 7 0 0 | 8 0 2597 9 0 10 | 11 0 0 | 12 0 2598 ------------------------------------- 2599 13 0 14 | 15 16 17 | 0 0 2600 Proc1 0 18 0 | 19 20 21 | 0 0 2601 0 0 0 | 22 23 0 | 24 0 2602 ------------------------------------- 2603 Proc2 25 26 27 | 0 0 28 | 29 0 2604 30 0 0 | 31 32 33 | 0 34 2605 .ve 2606 2607 This can be represented as a collection of submatrices as: 2608 2609 .vb 2610 A B C 2611 D E F 2612 G H I 2613 .ve 2614 2615 Where the submatrices A,B,C are owned by proc0, D,E,F are 2616 owned by proc1, G,H,I are owned by proc2. 2617 2618 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2619 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2620 The 'M','N' parameters are 8,8, and have the same values on all procs. 2621 2622 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2623 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2624 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2625 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2626 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2627 matrix, ans [DF] as another SeqAIJ matrix. 2628 2629 When d_nz, o_nz parameters are specified, d_nz storage elements are 2630 allocated for every row of the local diagonal submatrix, and o_nz 2631 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2632 One way to choose d_nz and o_nz is to use the max nonzerors per local 2633 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2634 In this case, the values of d_nz,o_nz are: 2635 .vb 2636 proc0 : dnz = 2, o_nz = 2 2637 proc1 : dnz = 3, o_nz = 2 2638 proc2 : dnz = 1, o_nz = 4 2639 .ve 2640 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2641 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2642 for proc3. i.e we are using 12+15+10=37 storage locations to store 2643 34 values. 2644 2645 When d_nnz, o_nnz parameters are specified, the storage is specified 2646 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2647 In the above case the values for d_nnz,o_nnz are: 2648 .vb 2649 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2650 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2651 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2652 .ve 2653 Here the space allocated is sum of all the above values i.e 34, and 2654 hence pre-allocation is perfect. 2655 2656 Level: intermediate 2657 2658 .keywords: matrix, aij, compressed row, sparse, parallel 2659 2660 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2661 MPIAIJ 2662 @*/ 2663 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2664 { 2665 PetscErrorCode ierr; 2666 PetscMPIInt size; 2667 2668 PetscFunctionBegin; 2669 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2670 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2671 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2672 if (size > 1) { 2673 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2674 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2675 } else { 2676 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2677 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2678 } 2679 PetscFunctionReturn(0); 2680 } 2681 2682 #undef __FUNCT__ 2683 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2684 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2685 { 2686 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2687 2688 PetscFunctionBegin; 2689 *Ad = a->A; 2690 *Ao = a->B; 2691 *colmap = a->garray; 2692 PetscFunctionReturn(0); 2693 } 2694 2695 #undef __FUNCT__ 2696 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2697 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2698 { 2699 PetscErrorCode ierr; 2700 PetscInt i; 2701 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2702 2703 PetscFunctionBegin; 2704 if (coloring->ctype == IS_COLORING_LOCAL) { 2705 ISColoringValue *allcolors,*colors; 2706 ISColoring ocoloring; 2707 2708 /* set coloring for diagonal portion */ 2709 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2710 2711 /* set coloring for off-diagonal portion */ 2712 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2713 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2714 for (i=0; i<a->B->n; i++) { 2715 colors[i] = allcolors[a->garray[i]]; 2716 } 2717 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2718 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,coloring->n,colors,&ocoloring);CHKERRQ(ierr); 2719 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2720 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2721 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2722 ISColoringValue *colors; 2723 PetscInt *larray; 2724 ISColoring ocoloring; 2725 2726 /* set coloring for diagonal portion */ 2727 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2728 for (i=0; i<a->A->n; i++) { 2729 larray[i] = i + a->cstart; 2730 } 2731 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2732 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2733 for (i=0; i<a->A->n; i++) { 2734 colors[i] = coloring->colors[larray[i]]; 2735 } 2736 ierr = PetscFree(larray);CHKERRQ(ierr); 2737 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,coloring->n,colors,&ocoloring);CHKERRQ(ierr); 2738 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2739 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2740 2741 /* set coloring for off-diagonal portion */ 2742 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2743 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2744 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2745 for (i=0; i<a->B->n; i++) { 2746 colors[i] = coloring->colors[larray[i]]; 2747 } 2748 ierr = PetscFree(larray);CHKERRQ(ierr); 2749 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,coloring->n,colors,&ocoloring);CHKERRQ(ierr); 2750 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2751 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2752 } else { 2753 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2754 } 2755 2756 PetscFunctionReturn(0); 2757 } 2758 2759 #if defined(PETSC_HAVE_ADIC) 2760 #undef __FUNCT__ 2761 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2762 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2763 { 2764 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2765 PetscErrorCode ierr; 2766 2767 PetscFunctionBegin; 2768 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2769 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2770 PetscFunctionReturn(0); 2771 } 2772 #endif 2773 2774 #undef __FUNCT__ 2775 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2776 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2777 { 2778 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2779 PetscErrorCode ierr; 2780 2781 PetscFunctionBegin; 2782 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2783 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2784 PetscFunctionReturn(0); 2785 } 2786 2787 #undef __FUNCT__ 2788 #define __FUNCT__ "MatMerge" 2789 /*@C 2790 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2791 matrices from each processor 2792 2793 Collective on MPI_Comm 2794 2795 Input Parameters: 2796 + comm - the communicators the parallel matrix will live on 2797 . inmat - the input sequential matrices 2798 . n - number of local columns (or PETSC_DECIDE) 2799 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2800 2801 Output Parameter: 2802 . outmat - the parallel matrix generated 2803 2804 Level: advanced 2805 2806 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2807 2808 @*/ 2809 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2810 { 2811 PetscErrorCode ierr; 2812 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2813 PetscInt *indx; 2814 PetscScalar *values; 2815 PetscMap columnmap,rowmap; 2816 2817 PetscFunctionBegin; 2818 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2819 /* 2820 PetscMPIInt rank; 2821 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2822 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2823 */ 2824 if (scall == MAT_INITIAL_MATRIX){ 2825 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2826 if (n == PETSC_DECIDE){ 2827 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2828 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2829 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2830 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2831 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2832 } 2833 2834 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2835 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2836 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2837 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2838 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2839 2840 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2841 for (i=0;i<m;i++) { 2842 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2843 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2844 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2845 } 2846 /* This routine will ONLY return MPIAIJ type matrix */ 2847 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2848 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2849 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2850 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2851 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2852 2853 } else if (scall == MAT_REUSE_MATRIX){ 2854 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2855 } else { 2856 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2857 } 2858 2859 for (i=0;i<m;i++) { 2860 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2861 I = i + rstart; 2862 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2863 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2864 } 2865 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2866 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2867 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2868 2869 PetscFunctionReturn(0); 2870 } 2871 2872 #undef __FUNCT__ 2873 #define __FUNCT__ "MatFileSplit" 2874 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2875 { 2876 PetscErrorCode ierr; 2877 PetscMPIInt rank; 2878 PetscInt m,N,i,rstart,nnz; 2879 size_t len; 2880 const PetscInt *indx; 2881 PetscViewer out; 2882 char *name; 2883 Mat B; 2884 const PetscScalar *values; 2885 2886 PetscFunctionBegin; 2887 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2888 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2889 /* Should this be the type of the diagonal block of A? */ 2890 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2891 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 2892 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2893 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2894 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2895 for (i=0;i<m;i++) { 2896 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2897 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2898 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2899 } 2900 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2901 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2902 2903 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2904 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2905 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2906 sprintf(name,"%s.%d",outfile,rank); 2907 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 2908 ierr = PetscFree(name); 2909 ierr = MatView(B,out);CHKERRQ(ierr); 2910 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2911 ierr = MatDestroy(B);CHKERRQ(ierr); 2912 PetscFunctionReturn(0); 2913 } 2914 2915 #include "src/mat/utils/freespace.h" 2916 #include "petscbt.h" 2917 static PetscEvent logkey_seqstompinum = 0; 2918 #undef __FUNCT__ 2919 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 2920 /*@C 2921 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2922 matrices from each processor 2923 2924 Collective on MPI_Comm 2925 2926 Input Parameters: 2927 + comm - the communicators the parallel matrix will live on 2928 . seqmat - the input sequential matrices 2929 . m - number of local rows (or PETSC_DECIDE) 2930 . n - number of local columns (or PETSC_DECIDE) 2931 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2932 2933 Output Parameter: 2934 . mpimat - the parallel matrix generated 2935 2936 Level: advanced 2937 2938 Notes: 2939 The dimensions of the sequential matrix in each processor MUST be the same. 2940 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2941 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2942 @*/ 2943 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2944 { 2945 PetscErrorCode ierr; 2946 MPI_Comm comm=mpimat->comm; 2947 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2948 PetscMPIInt size,rank,taga,*len_s; 2949 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2950 PetscInt proc,m; 2951 PetscInt **buf_ri,**buf_rj; 2952 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2953 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2954 MPI_Request *s_waits,*r_waits; 2955 MPI_Status *status; 2956 MatScalar *aa=a->a,**abuf_r,*ba_i; 2957 Mat_Merge_SeqsToMPI *merge; 2958 PetscObjectContainer container; 2959 2960 PetscFunctionBegin; 2961 if (!logkey_seqstompinum) { 2962 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2963 } 2964 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2965 2966 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2967 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2968 2969 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2970 if (container) { 2971 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2972 } 2973 bi = merge->bi; 2974 bj = merge->bj; 2975 buf_ri = merge->buf_ri; 2976 buf_rj = merge->buf_rj; 2977 2978 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2979 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2980 len_s = merge->len_s; 2981 2982 /* send and recv matrix values */ 2983 /*-----------------------------*/ 2984 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2985 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2986 2987 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2988 for (proc=0,k=0; proc<size; proc++){ 2989 if (!len_s[proc]) continue; 2990 i = owners[proc]; 2991 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2992 k++; 2993 } 2994 2995 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 2996 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 2997 ierr = PetscFree(status);CHKERRQ(ierr); 2998 2999 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3000 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3001 3002 /* insert mat values of mpimat */ 3003 /*----------------------------*/ 3004 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3005 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3006 nextrow = buf_ri_k + merge->nrecv; 3007 nextai = nextrow + merge->nrecv; 3008 3009 for (k=0; k<merge->nrecv; k++){ 3010 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3011 nrows = *(buf_ri_k[k]); 3012 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3013 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3014 } 3015 3016 /* set values of ba */ 3017 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 3018 for (i=0; i<m; i++) { 3019 arow = owners[rank] + i; 3020 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3021 bnzi = bi[i+1] - bi[i]; 3022 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3023 3024 /* add local non-zero vals of this proc's seqmat into ba */ 3025 anzi = ai[arow+1] - ai[arow]; 3026 aj = a->j + ai[arow]; 3027 aa = a->a + ai[arow]; 3028 nextaj = 0; 3029 for (j=0; nextaj<anzi; j++){ 3030 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3031 ba_i[j] += aa[nextaj++]; 3032 } 3033 } 3034 3035 /* add received vals into ba */ 3036 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3037 /* i-th row */ 3038 if (i == *nextrow[k]) { 3039 anzi = *(nextai[k]+1) - *nextai[k]; 3040 aj = buf_rj[k] + *(nextai[k]); 3041 aa = abuf_r[k] + *(nextai[k]); 3042 nextaj = 0; 3043 for (j=0; nextaj<anzi; j++){ 3044 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3045 ba_i[j] += aa[nextaj++]; 3046 } 3047 } 3048 nextrow[k]++; nextai[k]++; 3049 } 3050 } 3051 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3052 } 3053 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3054 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3055 3056 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3057 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3058 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3059 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3060 PetscFunctionReturn(0); 3061 } 3062 3063 static PetscEvent logkey_seqstompisym = 0; 3064 EXTERN PetscErrorCode MatDestroy_MPIAIJ_MatPtAP(Mat); 3065 #undef __FUNCT__ 3066 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3067 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3068 { 3069 PetscErrorCode ierr; 3070 Mat B_mpi; 3071 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3072 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3073 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3074 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3075 PetscInt len,proc,*dnz,*onz; 3076 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3077 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3078 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3079 MPI_Status *status; 3080 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3081 PetscBT lnkbt; 3082 Mat_Merge_SeqsToMPI *merge; 3083 PetscObjectContainer container; 3084 3085 PetscFunctionBegin; 3086 if (!logkey_seqstompisym) { 3087 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3088 } 3089 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3090 3091 /* make sure it is a PETSc comm */ 3092 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3093 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3094 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3095 3096 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3097 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3098 3099 /* determine row ownership */ 3100 /*---------------------------------------------------------*/ 3101 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3102 if (m == PETSC_DECIDE) { 3103 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3104 } else { 3105 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3106 } 3107 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3108 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3109 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3110 3111 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3112 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3113 3114 /* determine the number of messages to send, their lengths */ 3115 /*---------------------------------------------------------*/ 3116 len_s = merge->len_s; 3117 3118 len = 0; /* length of buf_si[] */ 3119 merge->nsend = 0; 3120 for (proc=0; proc<size; proc++){ 3121 len_si[proc] = 0; 3122 if (proc == rank){ 3123 len_s[proc] = 0; 3124 } else { 3125 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3126 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3127 } 3128 if (len_s[proc]) { 3129 merge->nsend++; 3130 nrows = 0; 3131 for (i=owners[proc]; i<owners[proc+1]; i++){ 3132 if (ai[i+1] > ai[i]) nrows++; 3133 } 3134 len_si[proc] = 2*(nrows+1); 3135 len += len_si[proc]; 3136 } 3137 } 3138 3139 /* determine the number and length of messages to receive for ij-structure */ 3140 /*-------------------------------------------------------------------------*/ 3141 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3142 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3143 3144 /* post the Irecv of j-structure */ 3145 /*-------------------------------*/ 3146 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3147 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3148 3149 /* post the Isend of j-structure */ 3150 /*--------------------------------*/ 3151 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3152 sj_waits = si_waits + merge->nsend; 3153 3154 for (proc=0, k=0; proc<size; proc++){ 3155 if (!len_s[proc]) continue; 3156 i = owners[proc]; 3157 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3158 k++; 3159 } 3160 3161 /* receives and sends of j-structure are complete */ 3162 /*------------------------------------------------*/ 3163 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3164 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3165 3166 /* send and recv i-structure */ 3167 /*---------------------------*/ 3168 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3169 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3170 3171 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3172 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3173 for (proc=0,k=0; proc<size; proc++){ 3174 if (!len_s[proc]) continue; 3175 /* form outgoing message for i-structure: 3176 buf_si[0]: nrows to be sent 3177 [1:nrows]: row index (global) 3178 [nrows+1:2*nrows+1]: i-structure index 3179 */ 3180 /*-------------------------------------------*/ 3181 nrows = len_si[proc]/2 - 1; 3182 buf_si_i = buf_si + nrows+1; 3183 buf_si[0] = nrows; 3184 buf_si_i[0] = 0; 3185 nrows = 0; 3186 for (i=owners[proc]; i<owners[proc+1]; i++){ 3187 anzi = ai[i+1] - ai[i]; 3188 if (anzi) { 3189 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3190 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3191 nrows++; 3192 } 3193 } 3194 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3195 k++; 3196 buf_si += len_si[proc]; 3197 } 3198 3199 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3200 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3201 3202 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3203 for (i=0; i<merge->nrecv; i++){ 3204 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3205 } 3206 3207 ierr = PetscFree(len_si);CHKERRQ(ierr); 3208 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3209 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3210 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3211 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3212 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3213 ierr = PetscFree(status);CHKERRQ(ierr); 3214 3215 /* compute a local seq matrix in each processor */ 3216 /*----------------------------------------------*/ 3217 /* allocate bi array and free space for accumulating nonzero column info */ 3218 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3219 bi[0] = 0; 3220 3221 /* create and initialize a linked list */ 3222 nlnk = N+1; 3223 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3224 3225 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3226 len = 0; 3227 len = ai[owners[rank+1]] - ai[owners[rank]]; 3228 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3229 current_space = free_space; 3230 3231 /* determine symbolic info for each local row */ 3232 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3233 nextrow = buf_ri_k + merge->nrecv; 3234 nextai = nextrow + merge->nrecv; 3235 for (k=0; k<merge->nrecv; k++){ 3236 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3237 nrows = *buf_ri_k[k]; 3238 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3239 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3240 } 3241 3242 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3243 len = 0; 3244 for (i=0;i<m;i++) { 3245 bnzi = 0; 3246 /* add local non-zero cols of this proc's seqmat into lnk */ 3247 arow = owners[rank] + i; 3248 anzi = ai[arow+1] - ai[arow]; 3249 aj = a->j + ai[arow]; 3250 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3251 bnzi += nlnk; 3252 /* add received col data into lnk */ 3253 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3254 if (i == *nextrow[k]) { /* i-th row */ 3255 anzi = *(nextai[k]+1) - *nextai[k]; 3256 aj = buf_rj[k] + *nextai[k]; 3257 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3258 bnzi += nlnk; 3259 nextrow[k]++; nextai[k]++; 3260 } 3261 } 3262 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3263 3264 /* if free space is not available, make more free space */ 3265 if (current_space->local_remaining<bnzi) { 3266 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3267 nspacedouble++; 3268 } 3269 /* copy data into free space, then initialize lnk */ 3270 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3271 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3272 3273 current_space->array += bnzi; 3274 current_space->local_used += bnzi; 3275 current_space->local_remaining -= bnzi; 3276 3277 bi[i+1] = bi[i] + bnzi; 3278 } 3279 3280 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3281 3282 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3283 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3284 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3285 3286 /* create symbolic parallel matrix B_mpi */ 3287 /*---------------------------------------*/ 3288 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3289 if (n==PETSC_DECIDE) { 3290 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3291 } else { 3292 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3293 } 3294 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3295 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3296 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3297 3298 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3299 B_mpi->assembled = PETSC_FALSE; 3300 B_mpi->ops->destroy = MatDestroy_MPIAIJ_MatPtAP; 3301 merge->bi = bi; 3302 merge->bj = bj; 3303 merge->buf_ri = buf_ri; 3304 merge->buf_rj = buf_rj; 3305 merge->coi = PETSC_NULL; 3306 merge->coj = PETSC_NULL; 3307 merge->owners_co = PETSC_NULL; 3308 3309 /* attach the supporting struct to B_mpi for reuse */ 3310 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3311 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3312 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3313 *mpimat = B_mpi; 3314 3315 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3316 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3317 PetscFunctionReturn(0); 3318 } 3319 3320 static PetscEvent logkey_seqstompi = 0; 3321 #undef __FUNCT__ 3322 #define __FUNCT__ "MatMerge_SeqsToMPI" 3323 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3324 { 3325 PetscErrorCode ierr; 3326 3327 PetscFunctionBegin; 3328 if (!logkey_seqstompi) { 3329 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3330 } 3331 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3332 if (scall == MAT_INITIAL_MATRIX){ 3333 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3334 } 3335 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3336 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3337 PetscFunctionReturn(0); 3338 } 3339 static PetscEvent logkey_getlocalmat = 0; 3340 #undef __FUNCT__ 3341 #define __FUNCT__ "MatGetLocalMat" 3342 /*@C 3343 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3344 3345 Not Collective 3346 3347 Input Parameters: 3348 + A - the matrix 3349 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3350 3351 Output Parameter: 3352 . A_loc - the local sequential matrix generated 3353 3354 Level: developer 3355 3356 @*/ 3357 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3358 { 3359 PetscErrorCode ierr; 3360 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3361 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3362 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3363 PetscScalar *aa=a->a,*ba=b->a,*ca; 3364 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3365 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3366 3367 PetscFunctionBegin; 3368 if (!logkey_getlocalmat) { 3369 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3370 } 3371 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3372 if (scall == MAT_INITIAL_MATRIX){ 3373 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3374 ci[0] = 0; 3375 for (i=0; i<am; i++){ 3376 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3377 } 3378 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3379 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3380 k = 0; 3381 for (i=0; i<am; i++) { 3382 ncols_o = bi[i+1] - bi[i]; 3383 ncols_d = ai[i+1] - ai[i]; 3384 /* off-diagonal portion of A */ 3385 for (jo=0; jo<ncols_o; jo++) { 3386 col = cmap[*bj]; 3387 if (col >= cstart) break; 3388 cj[k] = col; bj++; 3389 ca[k++] = *ba++; 3390 } 3391 /* diagonal portion of A */ 3392 for (j=0; j<ncols_d; j++) { 3393 cj[k] = cstart + *aj++; 3394 ca[k++] = *aa++; 3395 } 3396 /* off-diagonal portion of A */ 3397 for (j=jo; j<ncols_o; j++) { 3398 cj[k] = cmap[*bj++]; 3399 ca[k++] = *ba++; 3400 } 3401 } 3402 /* put together the new matrix */ 3403 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3404 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3405 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3406 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3407 mat->freedata = PETSC_TRUE; 3408 mat->nonew = 0; 3409 } else if (scall == MAT_REUSE_MATRIX){ 3410 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3411 ci = mat->i; cj = mat->j; ca = mat->a; 3412 for (i=0; i<am; i++) { 3413 /* off-diagonal portion of A */ 3414 ncols_o = bi[i+1] - bi[i]; 3415 for (jo=0; jo<ncols_o; jo++) { 3416 col = cmap[*bj]; 3417 if (col >= cstart) break; 3418 *ca++ = *ba++; bj++; 3419 } 3420 /* diagonal portion of A */ 3421 ncols_d = ai[i+1] - ai[i]; 3422 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3423 /* off-diagonal portion of A */ 3424 for (j=jo; j<ncols_o; j++) { 3425 *ca++ = *ba++; bj++; 3426 } 3427 } 3428 } else { 3429 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3430 } 3431 3432 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3433 PetscFunctionReturn(0); 3434 } 3435 3436 static PetscEvent logkey_getlocalmatcondensed = 0; 3437 #undef __FUNCT__ 3438 #define __FUNCT__ "MatGetLocalMatCondensed" 3439 /*@C 3440 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3441 3442 Not Collective 3443 3444 Input Parameters: 3445 + A - the matrix 3446 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3447 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3448 3449 Output Parameter: 3450 . A_loc - the local sequential matrix generated 3451 3452 Level: developer 3453 3454 @*/ 3455 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3456 { 3457 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3458 PetscErrorCode ierr; 3459 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3460 IS isrowa,iscola; 3461 Mat *aloc; 3462 3463 PetscFunctionBegin; 3464 if (!logkey_getlocalmatcondensed) { 3465 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3466 } 3467 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3468 if (!row){ 3469 start = a->rstart; end = a->rend; 3470 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3471 } else { 3472 isrowa = *row; 3473 } 3474 if (!col){ 3475 start = a->cstart; 3476 cmap = a->garray; 3477 nzA = a->A->n; 3478 nzB = a->B->n; 3479 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3480 ncols = 0; 3481 for (i=0; i<nzB; i++) { 3482 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3483 else break; 3484 } 3485 imark = i; 3486 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3487 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3488 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3489 ierr = PetscFree(idx);CHKERRQ(ierr); 3490 } else { 3491 iscola = *col; 3492 } 3493 if (scall != MAT_INITIAL_MATRIX){ 3494 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3495 aloc[0] = *A_loc; 3496 } 3497 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3498 *A_loc = aloc[0]; 3499 ierr = PetscFree(aloc);CHKERRQ(ierr); 3500 if (!row){ 3501 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3502 } 3503 if (!col){ 3504 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3505 } 3506 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3507 PetscFunctionReturn(0); 3508 } 3509 3510 static PetscEvent logkey_GetBrowsOfAcols = 0; 3511 #undef __FUNCT__ 3512 #define __FUNCT__ "MatGetBrowsOfAcols" 3513 /*@C 3514 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3515 3516 Collective on Mat 3517 3518 Input Parameters: 3519 + A,B - the matrices in mpiaij format 3520 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3521 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3522 3523 Output Parameter: 3524 + rowb, colb - index sets of rows and columns of B to extract 3525 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3526 - B_seq - the sequential matrix generated 3527 3528 Level: developer 3529 3530 @*/ 3531 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3532 { 3533 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3534 PetscErrorCode ierr; 3535 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3536 IS isrowb,iscolb; 3537 Mat *bseq; 3538 3539 PetscFunctionBegin; 3540 if (a->cstart != b->rstart || a->cend != b->rend){ 3541 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3542 } 3543 if (!logkey_GetBrowsOfAcols) { 3544 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3545 } 3546 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3547 3548 if (scall == MAT_INITIAL_MATRIX){ 3549 start = a->cstart; 3550 cmap = a->garray; 3551 nzA = a->A->n; 3552 nzB = a->B->n; 3553 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3554 ncols = 0; 3555 for (i=0; i<nzB; i++) { /* row < local row index */ 3556 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3557 else break; 3558 } 3559 imark = i; 3560 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3561 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3562 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3563 ierr = PetscFree(idx);CHKERRQ(ierr); 3564 *brstart = imark; 3565 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3566 } else { 3567 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3568 isrowb = *rowb; iscolb = *colb; 3569 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3570 bseq[0] = *B_seq; 3571 } 3572 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3573 *B_seq = bseq[0]; 3574 ierr = PetscFree(bseq);CHKERRQ(ierr); 3575 if (!rowb){ 3576 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3577 } else { 3578 *rowb = isrowb; 3579 } 3580 if (!colb){ 3581 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3582 } else { 3583 *colb = iscolb; 3584 } 3585 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3586 PetscFunctionReturn(0); 3587 } 3588 3589 static PetscEvent logkey_GetBrowsOfAocols = 0; 3590 #undef __FUNCT__ 3591 #define __FUNCT__ "MatGetBrowsOfAoCols" 3592 /*@C 3593 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3594 of the OFF-DIAGONAL portion of local A 3595 3596 Collective on Mat 3597 3598 Input Parameters: 3599 + A,B - the matrices in mpiaij format 3600 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3601 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3602 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3603 3604 Output Parameter: 3605 + B_oth - the sequential matrix generated 3606 3607 Level: developer 3608 3609 @*/ 3610 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3611 { 3612 VecScatter_MPI_General *gen_to,*gen_from; 3613 PetscErrorCode ierr; 3614 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3615 Mat_SeqAIJ *b_oth; 3616 VecScatter ctx=a->Mvctx; 3617 MPI_Comm comm=ctx->comm; 3618 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3619 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3620 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3621 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3622 MPI_Request *rwaits,*swaits; 3623 MPI_Status *sstatus,rstatus; 3624 PetscInt *cols; 3625 PetscScalar *vals; 3626 PetscMPIInt j; 3627 3628 PetscFunctionBegin; 3629 if (a->cstart != b->rstart || a->cend != b->rend){ 3630 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3631 } 3632 if (!logkey_GetBrowsOfAocols) { 3633 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3634 } 3635 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3636 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3637 3638 gen_to = (VecScatter_MPI_General*)ctx->todata; 3639 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3640 rvalues = gen_from->values; /* holds the length of sending row */ 3641 svalues = gen_to->values; /* holds the length of receiving row */ 3642 nrecvs = gen_from->n; 3643 nsends = gen_to->n; 3644 rwaits = gen_from->requests; 3645 swaits = gen_to->requests; 3646 srow = gen_to->indices; /* local row index to be sent */ 3647 rstarts = gen_from->starts; 3648 sstarts = gen_to->starts; 3649 rprocs = gen_from->procs; 3650 sprocs = gen_to->procs; 3651 sstatus = gen_to->sstatus; 3652 3653 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3654 if (scall == MAT_INITIAL_MATRIX){ 3655 /* i-array */ 3656 /*---------*/ 3657 /* post receives */ 3658 for (i=0; i<nrecvs; i++){ 3659 rowlen = (PetscInt*)rvalues + rstarts[i]; 3660 nrows = rstarts[i+1]-rstarts[i]; 3661 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3662 } 3663 3664 /* pack the outgoing message */ 3665 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3666 rstartsj = sstartsj + nsends +1; 3667 sstartsj[0] = 0; rstartsj[0] = 0; 3668 len = 0; /* total length of j or a array to be sent */ 3669 k = 0; 3670 for (i=0; i<nsends; i++){ 3671 rowlen = (PetscInt*)svalues + sstarts[i]; 3672 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3673 for (j=0; j<nrows; j++) { 3674 row = srow[k] + b->rowners[rank]; /* global row idx */ 3675 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3676 len += rowlen[j]; 3677 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3678 k++; 3679 } 3680 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3681 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3682 } 3683 /* recvs and sends of i-array are completed */ 3684 i = nrecvs; 3685 while (i--) { 3686 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3687 } 3688 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3689 /* allocate buffers for sending j and a arrays */ 3690 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3691 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3692 3693 /* create i-array of B_oth */ 3694 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3695 b_othi[0] = 0; 3696 len = 0; /* total length of j or a array to be received */ 3697 k = 0; 3698 for (i=0; i<nrecvs; i++){ 3699 rowlen = (PetscInt*)rvalues + rstarts[i]; 3700 nrows = rstarts[i+1]-rstarts[i]; 3701 for (j=0; j<nrows; j++) { 3702 b_othi[k+1] = b_othi[k] + rowlen[j]; 3703 len += rowlen[j]; k++; 3704 } 3705 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3706 } 3707 3708 /* allocate space for j and a arrrays of B_oth */ 3709 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3710 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3711 3712 /* j-array */ 3713 /*---------*/ 3714 /* post receives of j-array */ 3715 for (i=0; i<nrecvs; i++){ 3716 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3717 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3718 } 3719 k = 0; 3720 for (i=0; i<nsends; i++){ 3721 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3722 bufJ = bufj+sstartsj[i]; 3723 for (j=0; j<nrows; j++) { 3724 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3725 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3726 for (l=0; l<ncols; l++){ 3727 *bufJ++ = cols[l]; 3728 } 3729 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3730 } 3731 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3732 } 3733 3734 /* recvs and sends of j-array are completed */ 3735 i = nrecvs; 3736 while (i--) { 3737 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3738 } 3739 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3740 } else if (scall == MAT_REUSE_MATRIX){ 3741 sstartsj = *startsj; 3742 rstartsj = sstartsj + nsends +1; 3743 bufa = *bufa_ptr; 3744 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3745 b_otha = b_oth->a; 3746 } else { 3747 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3748 } 3749 3750 /* a-array */ 3751 /*---------*/ 3752 /* post receives of a-array */ 3753 for (i=0; i<nrecvs; i++){ 3754 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3755 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3756 } 3757 k = 0; 3758 for (i=0; i<nsends; i++){ 3759 nrows = sstarts[i+1]-sstarts[i]; 3760 bufA = bufa+sstartsj[i]; 3761 for (j=0; j<nrows; j++) { 3762 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3763 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3764 for (l=0; l<ncols; l++){ 3765 *bufA++ = vals[l]; 3766 } 3767 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3768 3769 } 3770 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3771 } 3772 /* recvs and sends of a-array are completed */ 3773 i = nrecvs; 3774 while (i--) { 3775 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3776 } 3777 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3778 3779 if (scall == MAT_INITIAL_MATRIX){ 3780 /* put together the new matrix */ 3781 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3782 3783 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3784 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3785 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3786 b_oth->freedata = PETSC_TRUE; 3787 b_oth->nonew = 0; 3788 3789 ierr = PetscFree(bufj);CHKERRQ(ierr); 3790 if (!startsj || !bufa_ptr){ 3791 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3792 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3793 } else { 3794 *startsj = sstartsj; 3795 *bufa_ptr = bufa; 3796 } 3797 } 3798 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3799 3800 PetscFunctionReturn(0); 3801 } 3802 3803 #undef __FUNCT__ 3804 #define __FUNCT__ "MatGetCommunicationStructs" 3805 /*@C 3806 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 3807 3808 Not Collective 3809 3810 Input Parameters: 3811 . A - The matrix in mpiaij format 3812 3813 Output Parameter: 3814 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 3815 . colmap - A map from global column index to local index into lvec 3816 - multScatter - A scatter from the argument of a matrix-vector product to lvec 3817 3818 Level: developer 3819 3820 @*/ 3821 #if defined (PETSC_USE_CTABLE) 3822 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 3823 #else 3824 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 3825 #endif 3826 { 3827 Mat_MPIAIJ *a; 3828 3829 PetscFunctionBegin; 3830 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 3831 PetscValidPointer(lvec, 2) 3832 PetscValidPointer(colmap, 3) 3833 PetscValidPointer(multScatter, 4) 3834 a = (Mat_MPIAIJ *) A->data; 3835 if (lvec) *lvec = a->lvec; 3836 if (colmap) *colmap = a->colmap; 3837 if (multScatter) *multScatter = a->Mvctx; 3838 PetscFunctionReturn(0); 3839 } 3840 3841 /*MC 3842 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3843 3844 Options Database Keys: 3845 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3846 3847 Level: beginner 3848 3849 .seealso: MatCreateMPIAIJ 3850 M*/ 3851 3852 EXTERN_C_BEGIN 3853 #undef __FUNCT__ 3854 #define __FUNCT__ "MatCreate_MPIAIJ" 3855 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3856 { 3857 Mat_MPIAIJ *b; 3858 PetscErrorCode ierr; 3859 PetscInt i; 3860 PetscMPIInt size; 3861 3862 PetscFunctionBegin; 3863 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3864 3865 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3866 B->data = (void*)b; 3867 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3868 B->factor = 0; 3869 B->bs = 1; 3870 B->assembled = PETSC_FALSE; 3871 B->mapping = 0; 3872 3873 B->insertmode = NOT_SET_VALUES; 3874 b->size = size; 3875 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3876 3877 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3878 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3879 3880 /* the information in the maps duplicates the information computed below, eventually 3881 we should remove the duplicate information that is not contained in the maps */ 3882 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3883 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3884 3885 /* build local table of row and column ownerships */ 3886 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3887 ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3888 b->cowners = b->rowners + b->size + 2; 3889 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3890 b->rowners[0] = 0; 3891 for (i=2; i<=b->size; i++) { 3892 b->rowners[i] += b->rowners[i-1]; 3893 } 3894 b->rstart = b->rowners[b->rank]; 3895 b->rend = b->rowners[b->rank+1]; 3896 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3897 b->cowners[0] = 0; 3898 for (i=2; i<=b->size; i++) { 3899 b->cowners[i] += b->cowners[i-1]; 3900 } 3901 b->cstart = b->cowners[b->rank]; 3902 b->cend = b->cowners[b->rank+1]; 3903 3904 /* build cache for off array entries formed */ 3905 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3906 b->donotstash = PETSC_FALSE; 3907 b->colmap = 0; 3908 b->garray = 0; 3909 b->roworiented = PETSC_TRUE; 3910 3911 /* stuff used for matrix vector multiply */ 3912 b->lvec = PETSC_NULL; 3913 b->Mvctx = PETSC_NULL; 3914 3915 /* stuff for MatGetRow() */ 3916 b->rowindices = 0; 3917 b->rowvalues = 0; 3918 b->getrowactive = PETSC_FALSE; 3919 3920 /* Explicitly create 2 MATSEQAIJ matrices. */ 3921 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3922 ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr); 3923 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3924 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3925 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3926 ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr); 3927 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3928 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3929 3930 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3931 "MatStoreValues_MPIAIJ", 3932 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3933 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3934 "MatRetrieveValues_MPIAIJ", 3935 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3936 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3937 "MatGetDiagonalBlock_MPIAIJ", 3938 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3939 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3940 "MatIsTranspose_MPIAIJ", 3941 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3942 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3943 "MatMPIAIJSetPreallocation_MPIAIJ", 3944 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3945 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3946 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3947 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3948 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3949 "MatDiagonalScaleLocal_MPIAIJ", 3950 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3951 PetscFunctionReturn(0); 3952 } 3953 EXTERN_C_END 3954 3955 /* 3956 Special version for direct calls from Fortran 3957 */ 3958 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3959 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 3960 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3961 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 3962 #endif 3963 3964 /* Change these macros so can be used in void function */ 3965 #undef CHKERRQ 3966 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 3967 #undef SETERRQ2 3968 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 3969 #undef SETERRQ 3970 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 3971 3972 EXTERN_C_BEGIN 3973 #undef __FUNCT__ 3974 #define __FUNCT__ "matsetvaluesmpiaij_" 3975 void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv) 3976 { 3977 Mat mat = *mmat; 3978 PetscInt m = *mm, n = *mn; 3979 InsertMode addv = *maddv; 3980 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3981 PetscScalar value; 3982 PetscErrorCode ierr; 3983 MatPreallocated(mat); 3984 if (mat->insertmode == NOT_SET_VALUES) { 3985 mat->insertmode = addv; 3986 } 3987 #if defined(PETSC_USE_DEBUG) 3988 else if (mat->insertmode != addv) { 3989 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3990 } 3991 #endif 3992 { 3993 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 3994 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 3995 PetscTruth roworiented = aij->roworiented; 3996 3997 /* Some Variables required in the macro */ 3998 Mat A = aij->A; 3999 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4000 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4001 PetscScalar *aa = a->a; 4002 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4003 Mat B = aij->B; 4004 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4005 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 4006 PetscScalar *ba = b->a; 4007 4008 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4009 PetscInt nonew = a->nonew; 4010 PetscScalar *ap1,*ap2; 4011 4012 PetscFunctionBegin; 4013 for (i=0; i<m; i++) { 4014 if (im[i] < 0) continue; 4015 #if defined(PETSC_USE_DEBUG) 4016 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 4017 #endif 4018 if (im[i] >= rstart && im[i] < rend) { 4019 row = im[i] - rstart; 4020 lastcol1 = -1; 4021 rp1 = aj + ai[row]; 4022 ap1 = aa + ai[row]; 4023 rmax1 = aimax[row]; 4024 nrow1 = ailen[row]; 4025 low1 = 0; 4026 high1 = nrow1; 4027 lastcol2 = -1; 4028 rp2 = bj + bi[row]; 4029 ap2 = ba + bi[row]; 4030 rmax2 = bimax[row]; 4031 nrow2 = bilen[row]; 4032 low2 = 0; 4033 high2 = nrow2; 4034 4035 for (j=0; j<n; j++) { 4036 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4037 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4038 if (in[j] >= cstart && in[j] < cend){ 4039 col = in[j] - cstart; 4040 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4041 } else if (in[j] < 0) continue; 4042 #if defined(PETSC_USE_DEBUG) 4043 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 4044 #endif 4045 else { 4046 if (mat->was_assembled) { 4047 if (!aij->colmap) { 4048 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4049 } 4050 #if defined (PETSC_USE_CTABLE) 4051 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4052 col--; 4053 #else 4054 col = aij->colmap[in[j]] - 1; 4055 #endif 4056 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4057 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4058 col = in[j]; 4059 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4060 B = aij->B; 4061 b = (Mat_SeqAIJ*)B->data; 4062 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4063 rp2 = bj + bi[row]; 4064 ap2 = ba + bi[row]; 4065 rmax2 = bimax[row]; 4066 nrow2 = bilen[row]; 4067 low2 = 0; 4068 high2 = nrow2; 4069 bm = aij->B->m; 4070 ba = b->a; 4071 } 4072 } else col = in[j]; 4073 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4074 } 4075 } 4076 } else { 4077 if (!aij->donotstash) { 4078 if (roworiented) { 4079 if (ignorezeroentries && v[i*n] == 0.0) continue; 4080 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4081 } else { 4082 if (ignorezeroentries && v[i] == 0.0) continue; 4083 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4084 } 4085 } 4086 } 4087 }} 4088 PetscFunctionReturnVoid(); 4089 } 4090 EXTERN_C_END 4091