1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" /*I "petscmat.h" I*/ 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \ 59 N = nrow1++ - 1; a->nz++; high1++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \ 93 N = nrow2++ - 1; b->nz++; high2++;\ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 141 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->m; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatGetValues_MPIAIJ" 241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 242 { 243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 244 PetscErrorCode ierr; 245 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 246 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 247 248 PetscFunctionBegin; 249 for (i=0; i<m; i++) { 250 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 251 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 252 if (idxm[i] >= rstart && idxm[i] < rend) { 253 row = idxm[i] - rstart; 254 for (j=0; j<n; j++) { 255 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 256 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 257 if (idxn[j] >= cstart && idxn[j] < cend){ 258 col = idxn[j] - cstart; 259 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 260 } else { 261 if (!aij->colmap) { 262 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 263 } 264 #if defined (PETSC_USE_CTABLE) 265 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 266 col --; 267 #else 268 col = aij->colmap[idxn[j]] - 1; 269 #endif 270 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 271 else { 272 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 273 } 274 } 275 } 276 } else { 277 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 278 } 279 } 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 if (aij->donotstash) { 294 PetscFunctionReturn(0); 295 } 296 297 /* make sure all processors are either in INSERTMODE or ADDMODE */ 298 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 299 if (addv == (ADD_VALUES|INSERT_VALUES)) { 300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 301 } 302 mat->insertmode = addv; /* in case this processor had no cache */ 303 304 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 305 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 306 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 313 { 314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 315 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 316 PetscErrorCode ierr; 317 PetscMPIInt n; 318 PetscInt i,j,rstart,ncols,flg; 319 PetscInt *row,*col,other_disassembled; 320 PetscScalar *val; 321 InsertMode addv = mat->insertmode; 322 323 PetscFunctionBegin; 324 if (!aij->donotstash) { 325 while (1) { 326 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 327 if (!flg) break; 328 329 for (i=0; i<n;) { 330 /* Now identify the consecutive vals belonging to the same row */ 331 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 332 if (j < n) ncols = j-i; 333 else ncols = n-i; 334 /* Now assemble all these values with a single function call */ 335 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 336 i = j; 337 } 338 } 339 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 340 } 341 a->compressedrow.use = PETSC_FALSE; 342 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 344 345 /* determine if any processor has disassembled, if so we must 346 also disassemble ourselfs, in order that we may reassemble. */ 347 /* 348 if nonzero structure of submatrix B cannot change then we know that 349 no processor disassembled thus we can skip this stuff 350 */ 351 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 352 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 353 if (mat->was_assembled && !other_disassembled) { 354 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 355 } 356 } 357 /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */ 358 b = (Mat_SeqAIJ *)aij->B->data; 359 360 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 361 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 362 } 363 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 364 b->compressedrow.use = PETSC_TRUE; 365 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 366 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 367 368 if (aij->rowvalues) { 369 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 370 aij->rowvalues = 0; 371 } 372 373 /* used by MatAXPY() */ 374 a->xtoy = 0; b->xtoy = 0; 375 a->XtoY = 0; b->XtoY = 0; 376 377 PetscFunctionReturn(0); 378 } 379 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 382 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 383 { 384 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 385 PetscErrorCode ierr; 386 387 PetscFunctionBegin; 388 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 389 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 390 PetscFunctionReturn(0); 391 } 392 393 #undef __FUNCT__ 394 #define __FUNCT__ "MatZeroRows_MPIAIJ" 395 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 396 { 397 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 398 PetscErrorCode ierr; 399 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 400 PetscInt i,*owners = l->rowners; 401 PetscInt *nprocs,j,idx,nsends,row; 402 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 403 PetscInt *rvalues,count,base,slen,*source; 404 PetscInt *lens,*lrows,*values,rstart=l->rstart; 405 MPI_Comm comm = A->comm; 406 MPI_Request *send_waits,*recv_waits; 407 MPI_Status recv_status,*send_status; 408 #if defined(PETSC_DEBUG) 409 PetscTruth found = PETSC_FALSE; 410 #endif 411 412 PetscFunctionBegin; 413 /* first count number of contributors to each processor */ 414 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 415 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 416 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 417 j = 0; 418 for (i=0; i<N; i++) { 419 if (lastidx > (idx = rows[i])) j = 0; 420 lastidx = idx; 421 for (; j<size; j++) { 422 if (idx >= owners[j] && idx < owners[j+1]) { 423 nprocs[2*j]++; 424 nprocs[2*j+1] = 1; 425 owner[i] = j; 426 #if defined(PETSC_DEBUG) 427 found = PETSC_TRUE; 428 #endif 429 break; 430 } 431 } 432 #if defined(PETSC_DEBUG) 433 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 434 found = PETSC_FALSE; 435 #endif 436 } 437 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 438 439 /* inform other processors of number of messages and max length*/ 440 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 441 442 /* post receives: */ 443 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 444 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 445 for (i=0; i<nrecvs; i++) { 446 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 447 } 448 449 /* do sends: 450 1) starts[i] gives the starting index in svalues for stuff going to 451 the ith processor 452 */ 453 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 454 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 455 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 456 starts[0] = 0; 457 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 458 for (i=0; i<N; i++) { 459 svalues[starts[owner[i]]++] = rows[i]; 460 } 461 462 starts[0] = 0; 463 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 464 count = 0; 465 for (i=0; i<size; i++) { 466 if (nprocs[2*i+1]) { 467 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 468 } 469 } 470 ierr = PetscFree(starts);CHKERRQ(ierr); 471 472 base = owners[rank]; 473 474 /* wait on receives */ 475 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 476 source = lens + nrecvs; 477 count = nrecvs; slen = 0; 478 while (count) { 479 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 480 /* unpack receives into our local space */ 481 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 482 source[imdex] = recv_status.MPI_SOURCE; 483 lens[imdex] = n; 484 slen += n; 485 count--; 486 } 487 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 488 489 /* move the data into the send scatter */ 490 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 491 count = 0; 492 for (i=0; i<nrecvs; i++) { 493 values = rvalues + i*nmax; 494 for (j=0; j<lens[i]; j++) { 495 lrows[count++] = values[j] - base; 496 } 497 } 498 ierr = PetscFree(rvalues);CHKERRQ(ierr); 499 ierr = PetscFree(lens);CHKERRQ(ierr); 500 ierr = PetscFree(owner);CHKERRQ(ierr); 501 ierr = PetscFree(nprocs);CHKERRQ(ierr); 502 503 /* actually zap the local rows */ 504 /* 505 Zero the required rows. If the "diagonal block" of the matrix 506 is square and the user wishes to set the diagonal we use separate 507 code so that MatSetValues() is not called for each diagonal allocating 508 new memory, thus calling lots of mallocs and slowing things down. 509 510 Contributed by: Matthew Knepley 511 */ 512 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 513 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 514 if ((diag != 0.0) && (l->A->M == l->A->N)) { 515 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 516 } else if (diag != 0.0) { 517 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 518 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 519 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 520 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 521 } 522 for (i = 0; i < slen; i++) { 523 row = lrows[i] + rstart; 524 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 525 } 526 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 527 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 } else { 529 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 530 } 531 ierr = PetscFree(lrows);CHKERRQ(ierr); 532 533 /* wait on sends */ 534 if (nsends) { 535 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 536 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 537 ierr = PetscFree(send_status);CHKERRQ(ierr); 538 } 539 ierr = PetscFree(send_waits);CHKERRQ(ierr); 540 ierr = PetscFree(svalues);CHKERRQ(ierr); 541 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatMult_MPIAIJ" 547 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 548 { 549 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 550 PetscErrorCode ierr; 551 PetscInt nt; 552 553 PetscFunctionBegin; 554 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 555 if (nt != A->n) { 556 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 557 } 558 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 559 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 560 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 561 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 562 PetscFunctionReturn(0); 563 } 564 565 #undef __FUNCT__ 566 #define __FUNCT__ "MatMultAdd_MPIAIJ" 567 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 568 { 569 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 570 PetscErrorCode ierr; 571 572 PetscFunctionBegin; 573 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 574 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 575 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 576 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 582 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 583 { 584 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 585 PetscErrorCode ierr; 586 PetscTruth merged; 587 588 PetscFunctionBegin; 589 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 590 /* do nondiagonal part */ 591 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 592 if (!merged) { 593 /* send it on its way */ 594 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 595 /* do local part */ 596 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 597 /* receive remote parts: note this assumes the values are not actually */ 598 /* added in yy until the next line, */ 599 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 600 } else { 601 /* do local part */ 602 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 603 /* send it on its way */ 604 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 605 /* values actually were received in the Begin() but we need to call this nop */ 606 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 607 } 608 PetscFunctionReturn(0); 609 } 610 611 EXTERN_C_BEGIN 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 614 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 615 { 616 MPI_Comm comm; 617 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 618 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 619 IS Me,Notme; 620 PetscErrorCode ierr; 621 PetscInt M,N,first,last,*notme,i; 622 PetscMPIInt size; 623 624 PetscFunctionBegin; 625 626 /* Easy test: symmetric diagonal block */ 627 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 628 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 629 if (!*f) PetscFunctionReturn(0); 630 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 631 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 632 if (size == 1) PetscFunctionReturn(0); 633 634 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 635 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 636 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 637 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 638 for (i=0; i<first; i++) notme[i] = i; 639 for (i=last; i<M; i++) notme[i-last+first] = i; 640 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 641 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 642 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 643 Aoff = Aoffs[0]; 644 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 645 Boff = Boffs[0]; 646 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 647 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 648 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 649 ierr = ISDestroy(Me);CHKERRQ(ierr); 650 ierr = ISDestroy(Notme);CHKERRQ(ierr); 651 652 PetscFunctionReturn(0); 653 } 654 EXTERN_C_END 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 658 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 659 { 660 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 661 PetscErrorCode ierr; 662 663 PetscFunctionBegin; 664 /* do nondiagonal part */ 665 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 666 /* send it on its way */ 667 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 668 /* do local part */ 669 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 670 /* receive remote parts */ 671 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 672 PetscFunctionReturn(0); 673 } 674 675 /* 676 This only works correctly for square matrices where the subblock A->A is the 677 diagonal block 678 */ 679 #undef __FUNCT__ 680 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 681 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 682 { 683 PetscErrorCode ierr; 684 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 685 686 PetscFunctionBegin; 687 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 688 if (a->rstart != a->cstart || a->rend != a->cend) { 689 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 690 } 691 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatScale_MPIAIJ" 697 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 698 { 699 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 700 PetscErrorCode ierr; 701 702 PetscFunctionBegin; 703 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 704 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatDestroy_MPIAIJ" 710 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 711 { 712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 713 PetscErrorCode ierr; 714 715 PetscFunctionBegin; 716 #if defined(PETSC_USE_LOG) 717 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 718 #endif 719 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 720 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 721 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 722 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 723 #if defined (PETSC_USE_CTABLE) 724 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 725 #else 726 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 727 #endif 728 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 729 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 730 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 731 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 732 ierr = PetscFree(aij);CHKERRQ(ierr); 733 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 737 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 738 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 739 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 740 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatView_MPIAIJ_Binary" 746 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 747 { 748 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 749 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 750 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 751 PetscErrorCode ierr; 752 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 753 int fd; 754 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 755 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 756 PetscScalar *column_values; 757 758 PetscFunctionBegin; 759 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 760 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 761 nz = A->nz + B->nz; 762 if (!rank) { 763 header[0] = MAT_FILE_COOKIE; 764 header[1] = mat->M; 765 header[2] = mat->N; 766 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 767 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 768 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 769 /* get largest number of rows any processor has */ 770 rlen = mat->m; 771 range = mat->rmap.range; 772 for (i=1; i<size; i++) { 773 rlen = PetscMax(rlen,range[i+1] - range[i]); 774 } 775 } else { 776 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 777 rlen = mat->m; 778 } 779 780 /* load up the local row counts */ 781 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 782 for (i=0; i<mat->m; i++) { 783 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 784 } 785 786 /* store the row lengths to the file */ 787 if (!rank) { 788 MPI_Status status; 789 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 790 for (i=1; i<size; i++) { 791 rlen = range[i+1] - range[i]; 792 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 793 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 794 } 795 } else { 796 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 797 } 798 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 799 800 /* load up the local column indices */ 801 nzmax = nz; /* )th processor needs space a largest processor needs */ 802 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 803 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 804 cnt = 0; 805 for (i=0; i<mat->m; i++) { 806 for (j=B->i[i]; j<B->i[i+1]; j++) { 807 if ( (col = garray[B->j[j]]) > cstart) break; 808 column_indices[cnt++] = col; 809 } 810 for (k=A->i[i]; k<A->i[i+1]; k++) { 811 column_indices[cnt++] = A->j[k] + cstart; 812 } 813 for (; j<B->i[i+1]; j++) { 814 column_indices[cnt++] = garray[B->j[j]]; 815 } 816 } 817 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 818 819 /* store the column indices to the file */ 820 if (!rank) { 821 MPI_Status status; 822 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 823 for (i=1; i<size; i++) { 824 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 825 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 826 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 827 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 828 } 829 } else { 830 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 831 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 832 } 833 ierr = PetscFree(column_indices);CHKERRQ(ierr); 834 835 /* load up the local column values */ 836 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 837 cnt = 0; 838 for (i=0; i<mat->m; i++) { 839 for (j=B->i[i]; j<B->i[i+1]; j++) { 840 if ( garray[B->j[j]] > cstart) break; 841 column_values[cnt++] = B->a[j]; 842 } 843 for (k=A->i[i]; k<A->i[i+1]; k++) { 844 column_values[cnt++] = A->a[k]; 845 } 846 for (; j<B->i[i+1]; j++) { 847 column_values[cnt++] = B->a[j]; 848 } 849 } 850 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 851 852 /* store the column values to the file */ 853 if (!rank) { 854 MPI_Status status; 855 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 856 for (i=1; i<size; i++) { 857 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 858 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 859 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 860 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 861 } 862 } else { 863 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 864 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 865 } 866 ierr = PetscFree(column_values);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 872 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 873 { 874 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 875 PetscErrorCode ierr; 876 PetscMPIInt rank = aij->rank,size = aij->size; 877 PetscTruth isdraw,iascii,isbinary; 878 PetscViewer sviewer; 879 PetscViewerFormat format; 880 881 PetscFunctionBegin; 882 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 883 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 884 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 885 if (iascii) { 886 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 887 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 888 MatInfo info; 889 PetscTruth inodes; 890 891 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 892 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 893 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 894 if (!inodes) { 895 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 896 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 897 } else { 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 899 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 900 } 901 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 902 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 903 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 904 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 905 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 906 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } else if (format == PETSC_VIEWER_ASCII_INFO) { 909 PetscInt inodecount,inodelimit,*inodes; 910 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 911 if (inodes) { 912 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 913 } else { 914 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 915 } 916 PetscFunctionReturn(0); 917 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 918 PetscFunctionReturn(0); 919 } 920 } else if (isbinary) { 921 if (size == 1) { 922 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 923 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 924 } else { 925 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 926 } 927 PetscFunctionReturn(0); 928 } else if (isdraw) { 929 PetscDraw draw; 930 PetscTruth isnull; 931 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 932 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 933 } 934 935 if (size == 1) { 936 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 937 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 938 } else { 939 /* assemble the entire matrix onto first processor. */ 940 Mat A; 941 Mat_SeqAIJ *Aloc; 942 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 943 PetscScalar *a; 944 945 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 946 if (!rank) { 947 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 948 } else { 949 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 950 } 951 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 952 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 953 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 954 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 955 956 /* copy over the A part */ 957 Aloc = (Mat_SeqAIJ*)aij->A->data; 958 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 959 row = aij->rstart; 960 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 961 for (i=0; i<m; i++) { 962 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 963 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 964 } 965 aj = Aloc->j; 966 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 967 968 /* copy over the B part */ 969 Aloc = (Mat_SeqAIJ*)aij->B->data; 970 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 971 row = aij->rstart; 972 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 973 ct = cols; 974 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 975 for (i=0; i<m; i++) { 976 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 977 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 978 } 979 ierr = PetscFree(ct);CHKERRQ(ierr); 980 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 981 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 982 /* 983 Everyone has to call to draw the matrix since the graphics waits are 984 synchronized across all processors that share the PetscDraw object 985 */ 986 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 987 if (!rank) { 988 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 989 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 990 } 991 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 992 ierr = MatDestroy(A);CHKERRQ(ierr); 993 } 994 PetscFunctionReturn(0); 995 } 996 997 #undef __FUNCT__ 998 #define __FUNCT__ "MatView_MPIAIJ" 999 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1000 { 1001 PetscErrorCode ierr; 1002 PetscTruth iascii,isdraw,issocket,isbinary; 1003 1004 PetscFunctionBegin; 1005 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1006 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1007 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1008 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1009 if (iascii || isdraw || isbinary || issocket) { 1010 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1011 } else { 1012 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1013 } 1014 PetscFunctionReturn(0); 1015 } 1016 1017 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "MatRelax_MPIAIJ" 1021 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1022 { 1023 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1024 PetscErrorCode ierr; 1025 Vec bb1; 1026 1027 PetscFunctionBegin; 1028 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1029 1030 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1031 1032 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1033 if (flag & SOR_ZERO_INITIAL_GUESS) { 1034 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1035 its--; 1036 } 1037 1038 while (its--) { 1039 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1040 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1041 1042 /* update rhs: bb1 = bb - B*x */ 1043 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1044 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1045 1046 /* local sweep */ 1047 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1048 CHKERRQ(ierr); 1049 } 1050 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1051 if (flag & SOR_ZERO_INITIAL_GUESS) { 1052 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1053 its--; 1054 } 1055 while (its--) { 1056 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1057 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1058 1059 /* update rhs: bb1 = bb - B*x */ 1060 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1061 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1062 1063 /* local sweep */ 1064 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1065 CHKERRQ(ierr); 1066 } 1067 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1068 if (flag & SOR_ZERO_INITIAL_GUESS) { 1069 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1070 its--; 1071 } 1072 while (its--) { 1073 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1074 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1075 1076 /* update rhs: bb1 = bb - B*x */ 1077 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1078 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1079 1080 /* local sweep */ 1081 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1082 CHKERRQ(ierr); 1083 } 1084 } else { 1085 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1086 } 1087 1088 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatPermute_MPIAIJ" 1094 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1095 { 1096 MPI_Comm comm,pcomm; 1097 PetscInt first,local_size,nrows,*rows; 1098 int ntids; 1099 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1100 PetscErrorCode ierr; 1101 1102 PetscFunctionBegin; 1103 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1104 /* make a collective version of 'rowp' */ 1105 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1106 if (pcomm==comm) { 1107 crowp = rowp; 1108 } else { 1109 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1110 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1111 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1112 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1113 } 1114 /* collect the global row permutation and invert it */ 1115 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1116 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1117 if (pcomm!=comm) { 1118 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1119 } 1120 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1121 /* get the local target indices */ 1122 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1123 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1124 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1125 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1126 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1127 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1128 /* the column permutation is so much easier; 1129 make a local version of 'colp' and invert it */ 1130 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1131 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1132 if (ntids==1) { 1133 lcolp = colp; 1134 } else { 1135 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1136 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1137 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1138 } 1139 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1140 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1141 if (ntids>1) { 1142 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1143 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1144 } 1145 /* now we just get the submatrix */ 1146 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1147 /* clean up */ 1148 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1149 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1150 PetscFunctionReturn(0); 1151 } 1152 1153 #undef __FUNCT__ 1154 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1155 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1156 { 1157 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1158 Mat A = mat->A,B = mat->B; 1159 PetscErrorCode ierr; 1160 PetscReal isend[5],irecv[5]; 1161 1162 PetscFunctionBegin; 1163 info->block_size = 1.0; 1164 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1165 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1166 isend[3] = info->memory; isend[4] = info->mallocs; 1167 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1168 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1169 isend[3] += info->memory; isend[4] += info->mallocs; 1170 if (flag == MAT_LOCAL) { 1171 info->nz_used = isend[0]; 1172 info->nz_allocated = isend[1]; 1173 info->nz_unneeded = isend[2]; 1174 info->memory = isend[3]; 1175 info->mallocs = isend[4]; 1176 } else if (flag == MAT_GLOBAL_MAX) { 1177 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1178 info->nz_used = irecv[0]; 1179 info->nz_allocated = irecv[1]; 1180 info->nz_unneeded = irecv[2]; 1181 info->memory = irecv[3]; 1182 info->mallocs = irecv[4]; 1183 } else if (flag == MAT_GLOBAL_SUM) { 1184 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1185 info->nz_used = irecv[0]; 1186 info->nz_allocated = irecv[1]; 1187 info->nz_unneeded = irecv[2]; 1188 info->memory = irecv[3]; 1189 info->mallocs = irecv[4]; 1190 } 1191 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1192 info->fill_ratio_needed = 0; 1193 info->factor_mallocs = 0; 1194 info->rows_global = (double)matin->M; 1195 info->columns_global = (double)matin->N; 1196 info->rows_local = (double)matin->m; 1197 info->columns_local = (double)matin->N; 1198 1199 PetscFunctionReturn(0); 1200 } 1201 1202 #undef __FUNCT__ 1203 #define __FUNCT__ "MatSetOption_MPIAIJ" 1204 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1205 { 1206 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1207 PetscErrorCode ierr; 1208 1209 PetscFunctionBegin; 1210 switch (op) { 1211 case MAT_NO_NEW_NONZERO_LOCATIONS: 1212 case MAT_YES_NEW_NONZERO_LOCATIONS: 1213 case MAT_COLUMNS_UNSORTED: 1214 case MAT_COLUMNS_SORTED: 1215 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1216 case MAT_KEEP_ZEROED_ROWS: 1217 case MAT_NEW_NONZERO_LOCATION_ERR: 1218 case MAT_USE_INODES: 1219 case MAT_DO_NOT_USE_INODES: 1220 case MAT_IGNORE_ZERO_ENTRIES: 1221 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1222 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1223 break; 1224 case MAT_ROW_ORIENTED: 1225 a->roworiented = PETSC_TRUE; 1226 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1227 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1228 break; 1229 case MAT_ROWS_SORTED: 1230 case MAT_ROWS_UNSORTED: 1231 case MAT_YES_NEW_DIAGONALS: 1232 ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr); 1233 break; 1234 case MAT_COLUMN_ORIENTED: 1235 a->roworiented = PETSC_FALSE; 1236 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1237 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1238 break; 1239 case MAT_IGNORE_OFF_PROC_ENTRIES: 1240 a->donotstash = PETSC_TRUE; 1241 break; 1242 case MAT_NO_NEW_DIAGONALS: 1243 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1244 case MAT_SYMMETRIC: 1245 case MAT_STRUCTURALLY_SYMMETRIC: 1246 case MAT_HERMITIAN: 1247 case MAT_SYMMETRY_ETERNAL: 1248 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1249 break; 1250 case MAT_NOT_SYMMETRIC: 1251 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1252 case MAT_NOT_HERMITIAN: 1253 case MAT_NOT_SYMMETRY_ETERNAL: 1254 break; 1255 default: 1256 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1257 } 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatGetRow_MPIAIJ" 1263 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1264 { 1265 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1266 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1267 PetscErrorCode ierr; 1268 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1269 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1270 PetscInt *cmap,*idx_p; 1271 1272 PetscFunctionBegin; 1273 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1274 mat->getrowactive = PETSC_TRUE; 1275 1276 if (!mat->rowvalues && (idx || v)) { 1277 /* 1278 allocate enough space to hold information from the longest row. 1279 */ 1280 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1281 PetscInt max = 1,tmp; 1282 for (i=0; i<matin->m; i++) { 1283 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1284 if (max < tmp) { max = tmp; } 1285 } 1286 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1287 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1288 } 1289 1290 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1291 lrow = row - rstart; 1292 1293 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1294 if (!v) {pvA = 0; pvB = 0;} 1295 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1296 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1297 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1298 nztot = nzA + nzB; 1299 1300 cmap = mat->garray; 1301 if (v || idx) { 1302 if (nztot) { 1303 /* Sort by increasing column numbers, assuming A and B already sorted */ 1304 PetscInt imark = -1; 1305 if (v) { 1306 *v = v_p = mat->rowvalues; 1307 for (i=0; i<nzB; i++) { 1308 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1309 else break; 1310 } 1311 imark = i; 1312 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1313 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1314 } 1315 if (idx) { 1316 *idx = idx_p = mat->rowindices; 1317 if (imark > -1) { 1318 for (i=0; i<imark; i++) { 1319 idx_p[i] = cmap[cworkB[i]]; 1320 } 1321 } else { 1322 for (i=0; i<nzB; i++) { 1323 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1324 else break; 1325 } 1326 imark = i; 1327 } 1328 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1329 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1330 } 1331 } else { 1332 if (idx) *idx = 0; 1333 if (v) *v = 0; 1334 } 1335 } 1336 *nz = nztot; 1337 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1338 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1344 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1345 { 1346 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1347 1348 PetscFunctionBegin; 1349 if (!aij->getrowactive) { 1350 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1351 } 1352 aij->getrowactive = PETSC_FALSE; 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatNorm_MPIAIJ" 1358 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1359 { 1360 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1361 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1362 PetscErrorCode ierr; 1363 PetscInt i,j,cstart = aij->cstart; 1364 PetscReal sum = 0.0; 1365 PetscScalar *v; 1366 1367 PetscFunctionBegin; 1368 if (aij->size == 1) { 1369 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1370 } else { 1371 if (type == NORM_FROBENIUS) { 1372 v = amat->a; 1373 for (i=0; i<amat->nz; i++) { 1374 #if defined(PETSC_USE_COMPLEX) 1375 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1376 #else 1377 sum += (*v)*(*v); v++; 1378 #endif 1379 } 1380 v = bmat->a; 1381 for (i=0; i<bmat->nz; i++) { 1382 #if defined(PETSC_USE_COMPLEX) 1383 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1384 #else 1385 sum += (*v)*(*v); v++; 1386 #endif 1387 } 1388 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1389 *norm = sqrt(*norm); 1390 } else if (type == NORM_1) { /* max column norm */ 1391 PetscReal *tmp,*tmp2; 1392 PetscInt *jj,*garray = aij->garray; 1393 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1394 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1395 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1396 *norm = 0.0; 1397 v = amat->a; jj = amat->j; 1398 for (j=0; j<amat->nz; j++) { 1399 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1400 } 1401 v = bmat->a; jj = bmat->j; 1402 for (j=0; j<bmat->nz; j++) { 1403 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1404 } 1405 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1406 for (j=0; j<mat->N; j++) { 1407 if (tmp2[j] > *norm) *norm = tmp2[j]; 1408 } 1409 ierr = PetscFree(tmp);CHKERRQ(ierr); 1410 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1411 } else if (type == NORM_INFINITY) { /* max row norm */ 1412 PetscReal ntemp = 0.0; 1413 for (j=0; j<aij->A->m; j++) { 1414 v = amat->a + amat->i[j]; 1415 sum = 0.0; 1416 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1417 sum += PetscAbsScalar(*v); v++; 1418 } 1419 v = bmat->a + bmat->i[j]; 1420 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1421 sum += PetscAbsScalar(*v); v++; 1422 } 1423 if (sum > ntemp) ntemp = sum; 1424 } 1425 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1426 } else { 1427 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1428 } 1429 } 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatTranspose_MPIAIJ" 1435 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1436 { 1437 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1438 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1439 PetscErrorCode ierr; 1440 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1441 Mat B; 1442 PetscScalar *array; 1443 1444 PetscFunctionBegin; 1445 if (!matout && M != N) { 1446 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1447 } 1448 1449 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1450 ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr); 1451 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1452 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1453 1454 /* copy over the A part */ 1455 Aloc = (Mat_SeqAIJ*)a->A->data; 1456 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1457 row = a->rstart; 1458 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1459 for (i=0; i<m; i++) { 1460 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1461 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1462 } 1463 aj = Aloc->j; 1464 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1465 1466 /* copy over the B part */ 1467 Aloc = (Mat_SeqAIJ*)a->B->data; 1468 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1469 row = a->rstart; 1470 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1471 ct = cols; 1472 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1473 for (i=0; i<m; i++) { 1474 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1475 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1476 } 1477 ierr = PetscFree(ct);CHKERRQ(ierr); 1478 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1479 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1480 if (matout) { 1481 *matout = B; 1482 } else { 1483 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1484 } 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1490 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1491 { 1492 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1493 Mat a = aij->A,b = aij->B; 1494 PetscErrorCode ierr; 1495 PetscInt s1,s2,s3; 1496 1497 PetscFunctionBegin; 1498 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1499 if (rr) { 1500 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1501 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1502 /* Overlap communication with computation. */ 1503 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1504 } 1505 if (ll) { 1506 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1507 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1508 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1509 } 1510 /* scale the diagonal block */ 1511 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1512 1513 if (rr) { 1514 /* Do a scatter end and then right scale the off-diagonal block */ 1515 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1516 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1517 } 1518 1519 PetscFunctionReturn(0); 1520 } 1521 1522 1523 #undef __FUNCT__ 1524 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1525 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1526 { 1527 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1528 PetscErrorCode ierr; 1529 1530 PetscFunctionBegin; 1531 if (!a->rank) { 1532 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1533 } 1534 PetscFunctionReturn(0); 1535 } 1536 1537 #undef __FUNCT__ 1538 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1539 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1540 { 1541 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1542 PetscErrorCode ierr; 1543 1544 PetscFunctionBegin; 1545 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1546 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1547 PetscFunctionReturn(0); 1548 } 1549 #undef __FUNCT__ 1550 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1551 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1552 { 1553 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1554 PetscErrorCode ierr; 1555 1556 PetscFunctionBegin; 1557 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatEqual_MPIAIJ" 1563 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1564 { 1565 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1566 Mat a,b,c,d; 1567 PetscTruth flg; 1568 PetscErrorCode ierr; 1569 1570 PetscFunctionBegin; 1571 a = matA->A; b = matA->B; 1572 c = matB->A; d = matB->B; 1573 1574 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1575 if (flg) { 1576 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1577 } 1578 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1579 PetscFunctionReturn(0); 1580 } 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "MatCopy_MPIAIJ" 1584 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1585 { 1586 PetscErrorCode ierr; 1587 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1588 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1589 1590 PetscFunctionBegin; 1591 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1592 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1593 /* because of the column compression in the off-processor part of the matrix a->B, 1594 the number of columns in a->B and b->B may be different, hence we cannot call 1595 the MatCopy() directly on the two parts. If need be, we can provide a more 1596 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1597 then copying the submatrices */ 1598 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1599 } else { 1600 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1601 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1602 } 1603 PetscFunctionReturn(0); 1604 } 1605 1606 #undef __FUNCT__ 1607 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1608 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1609 { 1610 PetscErrorCode ierr; 1611 1612 PetscFunctionBegin; 1613 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 #include "petscblaslapack.h" 1618 #undef __FUNCT__ 1619 #define __FUNCT__ "MatAXPY_MPIAIJ" 1620 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1621 { 1622 PetscErrorCode ierr; 1623 PetscInt i; 1624 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1625 PetscBLASInt bnz,one=1; 1626 Mat_SeqAIJ *x,*y; 1627 1628 PetscFunctionBegin; 1629 if (str == SAME_NONZERO_PATTERN) { 1630 PetscScalar alpha = a; 1631 x = (Mat_SeqAIJ *)xx->A->data; 1632 y = (Mat_SeqAIJ *)yy->A->data; 1633 bnz = (PetscBLASInt)x->nz; 1634 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1635 x = (Mat_SeqAIJ *)xx->B->data; 1636 y = (Mat_SeqAIJ *)yy->B->data; 1637 bnz = (PetscBLASInt)x->nz; 1638 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1639 } else if (str == SUBSET_NONZERO_PATTERN) { 1640 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1641 1642 x = (Mat_SeqAIJ *)xx->B->data; 1643 y = (Mat_SeqAIJ *)yy->B->data; 1644 if (y->xtoy && y->XtoY != xx->B) { 1645 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1646 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1647 } 1648 if (!y->xtoy) { /* get xtoy */ 1649 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1650 y->XtoY = xx->B; 1651 } 1652 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1653 } else { 1654 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1655 } 1656 PetscFunctionReturn(0); 1657 } 1658 1659 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1660 1661 #undef __FUNCT__ 1662 #define __FUNCT__ "MatConjugate_MPIAIJ" 1663 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1664 { 1665 #if defined(PETSC_USE_COMPLEX) 1666 PetscErrorCode ierr; 1667 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1668 1669 PetscFunctionBegin; 1670 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1671 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1672 #else 1673 PetscFunctionBegin; 1674 #endif 1675 PetscFunctionReturn(0); 1676 } 1677 1678 #undef __FUNCT__ 1679 #define __FUNCT__ "MatRealPart_MPIAIJ" 1680 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 1681 { 1682 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1683 PetscErrorCode ierr; 1684 1685 PetscFunctionBegin; 1686 ierr = MatRealPart(a->A);CHKERRQ(ierr); 1687 ierr = MatRealPart(a->B);CHKERRQ(ierr); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 1693 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 1694 { 1695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1696 PetscErrorCode ierr; 1697 1698 PetscFunctionBegin; 1699 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 1700 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 1704 /* -------------------------------------------------------------------*/ 1705 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1706 MatGetRow_MPIAIJ, 1707 MatRestoreRow_MPIAIJ, 1708 MatMult_MPIAIJ, 1709 /* 4*/ MatMultAdd_MPIAIJ, 1710 MatMultTranspose_MPIAIJ, 1711 MatMultTransposeAdd_MPIAIJ, 1712 0, 1713 0, 1714 0, 1715 /*10*/ 0, 1716 0, 1717 0, 1718 MatRelax_MPIAIJ, 1719 MatTranspose_MPIAIJ, 1720 /*15*/ MatGetInfo_MPIAIJ, 1721 MatEqual_MPIAIJ, 1722 MatGetDiagonal_MPIAIJ, 1723 MatDiagonalScale_MPIAIJ, 1724 MatNorm_MPIAIJ, 1725 /*20*/ MatAssemblyBegin_MPIAIJ, 1726 MatAssemblyEnd_MPIAIJ, 1727 0, 1728 MatSetOption_MPIAIJ, 1729 MatZeroEntries_MPIAIJ, 1730 /*25*/ MatZeroRows_MPIAIJ, 1731 0, 1732 0, 1733 0, 1734 0, 1735 /*30*/ MatSetUpPreallocation_MPIAIJ, 1736 0, 1737 0, 1738 0, 1739 0, 1740 /*35*/ MatDuplicate_MPIAIJ, 1741 0, 1742 0, 1743 0, 1744 0, 1745 /*40*/ MatAXPY_MPIAIJ, 1746 MatGetSubMatrices_MPIAIJ, 1747 MatIncreaseOverlap_MPIAIJ, 1748 MatGetValues_MPIAIJ, 1749 MatCopy_MPIAIJ, 1750 /*45*/ MatPrintHelp_MPIAIJ, 1751 MatScale_MPIAIJ, 1752 0, 1753 0, 1754 0, 1755 /*50*/ MatSetBlockSize_MPIAIJ, 1756 0, 1757 0, 1758 0, 1759 0, 1760 /*55*/ MatFDColoringCreate_MPIAIJ, 1761 0, 1762 MatSetUnfactored_MPIAIJ, 1763 MatPermute_MPIAIJ, 1764 0, 1765 /*60*/ MatGetSubMatrix_MPIAIJ, 1766 MatDestroy_MPIAIJ, 1767 MatView_MPIAIJ, 1768 0, 1769 0, 1770 /*65*/ 0, 1771 0, 1772 0, 1773 0, 1774 0, 1775 /*70*/ 0, 1776 0, 1777 MatSetColoring_MPIAIJ, 1778 #if defined(PETSC_HAVE_ADIC) 1779 MatSetValuesAdic_MPIAIJ, 1780 #else 1781 0, 1782 #endif 1783 MatSetValuesAdifor_MPIAIJ, 1784 /*75*/ 0, 1785 0, 1786 0, 1787 0, 1788 0, 1789 /*80*/ 0, 1790 0, 1791 0, 1792 0, 1793 /*84*/ MatLoad_MPIAIJ, 1794 0, 1795 0, 1796 0, 1797 0, 1798 0, 1799 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1800 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1801 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1802 MatPtAP_Basic, 1803 MatPtAPSymbolic_MPIAIJ, 1804 /*95*/ MatPtAPNumeric_MPIAIJ, 1805 0, 1806 0, 1807 0, 1808 0, 1809 /*100*/0, 1810 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1811 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1812 MatConjugate_MPIAIJ, 1813 0, 1814 /*105*/MatSetValuesRow_MPIAIJ, 1815 MatRealPart_MPIAIJ, 1816 MatImaginaryPart_MPIAIJ}; 1817 1818 /* ----------------------------------------------------------------------------------------*/ 1819 1820 EXTERN_C_BEGIN 1821 #undef __FUNCT__ 1822 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1823 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1824 { 1825 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1826 PetscErrorCode ierr; 1827 1828 PetscFunctionBegin; 1829 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1830 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1831 PetscFunctionReturn(0); 1832 } 1833 EXTERN_C_END 1834 1835 EXTERN_C_BEGIN 1836 #undef __FUNCT__ 1837 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1838 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1839 { 1840 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1841 PetscErrorCode ierr; 1842 1843 PetscFunctionBegin; 1844 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1845 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1846 PetscFunctionReturn(0); 1847 } 1848 EXTERN_C_END 1849 1850 #include "petscpc.h" 1851 EXTERN_C_BEGIN 1852 #undef __FUNCT__ 1853 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1854 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1855 { 1856 Mat_MPIAIJ *b; 1857 PetscErrorCode ierr; 1858 PetscInt i; 1859 1860 PetscFunctionBegin; 1861 B->preallocated = PETSC_TRUE; 1862 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1863 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1864 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1865 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1866 if (d_nnz) { 1867 for (i=0; i<B->m; i++) { 1868 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1869 } 1870 } 1871 if (o_nnz) { 1872 for (i=0; i<B->m; i++) { 1873 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1874 } 1875 } 1876 b = (Mat_MPIAIJ*)B->data; 1877 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1878 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1879 1880 PetscFunctionReturn(0); 1881 } 1882 EXTERN_C_END 1883 1884 #undef __FUNCT__ 1885 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1886 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1887 { 1888 Mat mat; 1889 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1890 PetscErrorCode ierr; 1891 1892 PetscFunctionBegin; 1893 *newmat = 0; 1894 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1895 ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr); 1896 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1897 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1898 a = (Mat_MPIAIJ*)mat->data; 1899 1900 mat->factor = matin->factor; 1901 mat->bs = matin->bs; 1902 mat->assembled = PETSC_TRUE; 1903 mat->insertmode = NOT_SET_VALUES; 1904 mat->preallocated = PETSC_TRUE; 1905 1906 a->rstart = oldmat->rstart; 1907 a->rend = oldmat->rend; 1908 a->cstart = oldmat->cstart; 1909 a->cend = oldmat->cend; 1910 a->size = oldmat->size; 1911 a->rank = oldmat->rank; 1912 a->donotstash = oldmat->donotstash; 1913 a->roworiented = oldmat->roworiented; 1914 a->rowindices = 0; 1915 a->rowvalues = 0; 1916 a->getrowactive = PETSC_FALSE; 1917 1918 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1919 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1920 if (oldmat->colmap) { 1921 #if defined (PETSC_USE_CTABLE) 1922 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1923 #else 1924 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1925 ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1926 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1927 #endif 1928 } else a->colmap = 0; 1929 if (oldmat->garray) { 1930 PetscInt len; 1931 len = oldmat->B->n; 1932 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1933 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1934 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1935 } else a->garray = 0; 1936 1937 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1938 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1939 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1940 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1941 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1942 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1943 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1944 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1945 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1946 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1947 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1948 *newmat = mat; 1949 PetscFunctionReturn(0); 1950 } 1951 1952 #include "petscsys.h" 1953 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatLoad_MPIAIJ" 1956 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 1957 { 1958 Mat A; 1959 PetscScalar *vals,*svals; 1960 MPI_Comm comm = ((PetscObject)viewer)->comm; 1961 MPI_Status status; 1962 PetscErrorCode ierr; 1963 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1964 PetscInt i,nz,j,rstart,rend,mmax; 1965 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1966 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1967 PetscInt cend,cstart,n,*rowners; 1968 int fd; 1969 1970 PetscFunctionBegin; 1971 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1972 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1973 if (!rank) { 1974 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1975 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1976 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1977 } 1978 1979 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1980 M = header[1]; N = header[2]; 1981 /* determine ownership of all rows */ 1982 m = M/size + ((M % size) > rank); 1983 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1984 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1985 1986 /* First process needs enough room for process with most rows */ 1987 if (!rank) { 1988 mmax = rowners[1]; 1989 for (i=2; i<size; i++) { 1990 mmax = PetscMax(mmax,rowners[i]); 1991 } 1992 } else mmax = m; 1993 1994 rowners[0] = 0; 1995 for (i=2; i<=size; i++) { 1996 mmax = PetscMax(mmax,rowners[i]); 1997 rowners[i] += rowners[i-1]; 1998 } 1999 rstart = rowners[rank]; 2000 rend = rowners[rank+1]; 2001 2002 /* distribute row lengths to all processors */ 2003 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 2004 if (!rank) { 2005 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2006 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2007 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2008 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2009 for (j=0; j<m; j++) { 2010 procsnz[0] += ourlens[j]; 2011 } 2012 for (i=1; i<size; i++) { 2013 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2014 /* calculate the number of nonzeros on each processor */ 2015 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2016 procsnz[i] += rowlengths[j]; 2017 } 2018 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2019 } 2020 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2021 } else { 2022 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2023 } 2024 2025 if (!rank) { 2026 /* determine max buffer needed and allocate it */ 2027 maxnz = 0; 2028 for (i=0; i<size; i++) { 2029 maxnz = PetscMax(maxnz,procsnz[i]); 2030 } 2031 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2032 2033 /* read in my part of the matrix column indices */ 2034 nz = procsnz[0]; 2035 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2036 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2037 2038 /* read in every one elses and ship off */ 2039 for (i=1; i<size; i++) { 2040 nz = procsnz[i]; 2041 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2042 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2043 } 2044 ierr = PetscFree(cols);CHKERRQ(ierr); 2045 } else { 2046 /* determine buffer space needed for message */ 2047 nz = 0; 2048 for (i=0; i<m; i++) { 2049 nz += ourlens[i]; 2050 } 2051 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2052 2053 /* receive message of column indices*/ 2054 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2055 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2056 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2057 } 2058 2059 /* determine column ownership if matrix is not square */ 2060 if (N != M) { 2061 n = N/size + ((N % size) > rank); 2062 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2063 cstart = cend - n; 2064 } else { 2065 cstart = rstart; 2066 cend = rend; 2067 n = cend - cstart; 2068 } 2069 2070 /* loop over local rows, determining number of off diagonal entries */ 2071 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2072 jj = 0; 2073 for (i=0; i<m; i++) { 2074 for (j=0; j<ourlens[i]; j++) { 2075 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2076 jj++; 2077 } 2078 } 2079 2080 /* create our matrix */ 2081 for (i=0; i<m; i++) { 2082 ourlens[i] -= offlens[i]; 2083 } 2084 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2085 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2086 ierr = MatSetType(A,type);CHKERRQ(ierr); 2087 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2088 2089 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2090 for (i=0; i<m; i++) { 2091 ourlens[i] += offlens[i]; 2092 } 2093 2094 if (!rank) { 2095 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2096 2097 /* read in my part of the matrix numerical values */ 2098 nz = procsnz[0]; 2099 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2100 2101 /* insert into matrix */ 2102 jj = rstart; 2103 smycols = mycols; 2104 svals = vals; 2105 for (i=0; i<m; i++) { 2106 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2107 smycols += ourlens[i]; 2108 svals += ourlens[i]; 2109 jj++; 2110 } 2111 2112 /* read in other processors and ship out */ 2113 for (i=1; i<size; i++) { 2114 nz = procsnz[i]; 2115 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2116 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2117 } 2118 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2119 } else { 2120 /* receive numeric values */ 2121 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2122 2123 /* receive message of values*/ 2124 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2125 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2126 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2127 2128 /* insert into matrix */ 2129 jj = rstart; 2130 smycols = mycols; 2131 svals = vals; 2132 for (i=0; i<m; i++) { 2133 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2134 smycols += ourlens[i]; 2135 svals += ourlens[i]; 2136 jj++; 2137 } 2138 } 2139 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2140 ierr = PetscFree(vals);CHKERRQ(ierr); 2141 ierr = PetscFree(mycols);CHKERRQ(ierr); 2142 ierr = PetscFree(rowners);CHKERRQ(ierr); 2143 2144 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2145 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2146 *newmat = A; 2147 PetscFunctionReturn(0); 2148 } 2149 2150 #undef __FUNCT__ 2151 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2152 /* 2153 Not great since it makes two copies of the submatrix, first an SeqAIJ 2154 in local and then by concatenating the local matrices the end result. 2155 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2156 */ 2157 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2158 { 2159 PetscErrorCode ierr; 2160 PetscMPIInt rank,size; 2161 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2162 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2163 Mat *local,M,Mreuse; 2164 PetscScalar *vwork,*aa; 2165 MPI_Comm comm = mat->comm; 2166 Mat_SeqAIJ *aij; 2167 2168 2169 PetscFunctionBegin; 2170 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2171 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2172 2173 if (call == MAT_REUSE_MATRIX) { 2174 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2175 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2176 local = &Mreuse; 2177 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2178 } else { 2179 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2180 Mreuse = *local; 2181 ierr = PetscFree(local);CHKERRQ(ierr); 2182 } 2183 2184 /* 2185 m - number of local rows 2186 n - number of columns (same on all processors) 2187 rstart - first row in new global matrix generated 2188 */ 2189 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2190 if (call == MAT_INITIAL_MATRIX) { 2191 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2192 ii = aij->i; 2193 jj = aij->j; 2194 2195 /* 2196 Determine the number of non-zeros in the diagonal and off-diagonal 2197 portions of the matrix in order to do correct preallocation 2198 */ 2199 2200 /* first get start and end of "diagonal" columns */ 2201 if (csize == PETSC_DECIDE) { 2202 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2203 if (mglobal == n) { /* square matrix */ 2204 nlocal = m; 2205 } else { 2206 nlocal = n/size + ((n % size) > rank); 2207 } 2208 } else { 2209 nlocal = csize; 2210 } 2211 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2212 rstart = rend - nlocal; 2213 if (rank == size - 1 && rend != n) { 2214 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2215 } 2216 2217 /* next, compute all the lengths */ 2218 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2219 olens = dlens + m; 2220 for (i=0; i<m; i++) { 2221 jend = ii[i+1] - ii[i]; 2222 olen = 0; 2223 dlen = 0; 2224 for (j=0; j<jend; j++) { 2225 if (*jj < rstart || *jj >= rend) olen++; 2226 else dlen++; 2227 jj++; 2228 } 2229 olens[i] = olen; 2230 dlens[i] = dlen; 2231 } 2232 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2233 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2234 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2235 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2236 ierr = PetscFree(dlens);CHKERRQ(ierr); 2237 } else { 2238 PetscInt ml,nl; 2239 2240 M = *newmat; 2241 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2242 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2243 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2244 /* 2245 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2246 rather than the slower MatSetValues(). 2247 */ 2248 M->was_assembled = PETSC_TRUE; 2249 M->assembled = PETSC_FALSE; 2250 } 2251 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2252 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2253 ii = aij->i; 2254 jj = aij->j; 2255 aa = aij->a; 2256 for (i=0; i<m; i++) { 2257 row = rstart + i; 2258 nz = ii[i+1] - ii[i]; 2259 cwork = jj; jj += nz; 2260 vwork = aa; aa += nz; 2261 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2262 } 2263 2264 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2265 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2266 *newmat = M; 2267 2268 /* save submatrix used in processor for next request */ 2269 if (call == MAT_INITIAL_MATRIX) { 2270 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2271 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2272 } 2273 2274 PetscFunctionReturn(0); 2275 } 2276 2277 EXTERN_C_BEGIN 2278 #undef __FUNCT__ 2279 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2280 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2281 { 2282 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2283 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2284 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2285 const PetscInt *JJ; 2286 PetscScalar *values; 2287 PetscErrorCode ierr; 2288 2289 PetscFunctionBegin; 2290 if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]); 2291 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2292 o_nnz = d_nnz + m; 2293 2294 for (i=0; i<m; i++) { 2295 nnz = I[i+1]- I[i]; 2296 JJ = J + I[i]; 2297 nnz_max = PetscMax(nnz_max,nnz); 2298 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2299 for (j=0; j<nnz; j++) { 2300 if (*JJ >= cstart) break; 2301 JJ++; 2302 } 2303 d = 0; 2304 for (; j<nnz; j++) { 2305 if (*JJ++ >= cend) break; 2306 d++; 2307 } 2308 d_nnz[i] = d; 2309 o_nnz[i] = nnz - d; 2310 } 2311 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2312 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2313 2314 if (v) values = (PetscScalar*)v; 2315 else { 2316 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2317 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2318 } 2319 2320 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2321 for (i=0; i<m; i++) { 2322 ii = i + rstart; 2323 nnz = I[i+1]- I[i]; 2324 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2325 } 2326 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2327 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2328 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2329 2330 if (!v) { 2331 ierr = PetscFree(values);CHKERRQ(ierr); 2332 } 2333 PetscFunctionReturn(0); 2334 } 2335 EXTERN_C_END 2336 2337 #undef __FUNCT__ 2338 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2339 /*@ 2340 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2341 (the default parallel PETSc format). 2342 2343 Collective on MPI_Comm 2344 2345 Input Parameters: 2346 + B - the matrix 2347 . i - the indices into j for the start of each local row (starts with zero) 2348 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2349 - v - optional values in the matrix 2350 2351 Level: developer 2352 2353 .keywords: matrix, aij, compressed row, sparse, parallel 2354 2355 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2356 @*/ 2357 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2358 { 2359 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2360 2361 PetscFunctionBegin; 2362 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2363 if (f) { 2364 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2365 } 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2371 /*@C 2372 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2373 (the default parallel PETSc format). For good matrix assembly performance 2374 the user should preallocate the matrix storage by setting the parameters 2375 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2376 performance can be increased by more than a factor of 50. 2377 2378 Collective on MPI_Comm 2379 2380 Input Parameters: 2381 + A - the matrix 2382 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2383 (same value is used for all local rows) 2384 . d_nnz - array containing the number of nonzeros in the various rows of the 2385 DIAGONAL portion of the local submatrix (possibly different for each row) 2386 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2387 The size of this array is equal to the number of local rows, i.e 'm'. 2388 You must leave room for the diagonal entry even if it is zero. 2389 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2390 submatrix (same value is used for all local rows). 2391 - o_nnz - array containing the number of nonzeros in the various rows of the 2392 OFF-DIAGONAL portion of the local submatrix (possibly different for 2393 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2394 structure. The size of this array is equal to the number 2395 of local rows, i.e 'm'. 2396 2397 If the *_nnz parameter is given then the *_nz parameter is ignored 2398 2399 The AIJ format (also called the Yale sparse matrix format or 2400 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2401 storage. The stored row and column indices begin with zero. See the users manual for details. 2402 2403 The parallel matrix is partitioned such that the first m0 rows belong to 2404 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2405 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2406 2407 The DIAGONAL portion of the local submatrix of a processor can be defined 2408 as the submatrix which is obtained by extraction the part corresponding 2409 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2410 first row that belongs to the processor, and r2 is the last row belonging 2411 to the this processor. This is a square mxm matrix. The remaining portion 2412 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2413 2414 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2415 2416 Example usage: 2417 2418 Consider the following 8x8 matrix with 34 non-zero values, that is 2419 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2420 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2421 as follows: 2422 2423 .vb 2424 1 2 0 | 0 3 0 | 0 4 2425 Proc0 0 5 6 | 7 0 0 | 8 0 2426 9 0 10 | 11 0 0 | 12 0 2427 ------------------------------------- 2428 13 0 14 | 15 16 17 | 0 0 2429 Proc1 0 18 0 | 19 20 21 | 0 0 2430 0 0 0 | 22 23 0 | 24 0 2431 ------------------------------------- 2432 Proc2 25 26 27 | 0 0 28 | 29 0 2433 30 0 0 | 31 32 33 | 0 34 2434 .ve 2435 2436 This can be represented as a collection of submatrices as: 2437 2438 .vb 2439 A B C 2440 D E F 2441 G H I 2442 .ve 2443 2444 Where the submatrices A,B,C are owned by proc0, D,E,F are 2445 owned by proc1, G,H,I are owned by proc2. 2446 2447 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2448 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2449 The 'M','N' parameters are 8,8, and have the same values on all procs. 2450 2451 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2452 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2453 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2454 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2455 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2456 matrix, ans [DF] as another SeqAIJ matrix. 2457 2458 When d_nz, o_nz parameters are specified, d_nz storage elements are 2459 allocated for every row of the local diagonal submatrix, and o_nz 2460 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2461 One way to choose d_nz and o_nz is to use the max nonzerors per local 2462 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2463 In this case, the values of d_nz,o_nz are: 2464 .vb 2465 proc0 : dnz = 2, o_nz = 2 2466 proc1 : dnz = 3, o_nz = 2 2467 proc2 : dnz = 1, o_nz = 4 2468 .ve 2469 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2470 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2471 for proc3. i.e we are using 12+15+10=37 storage locations to store 2472 34 values. 2473 2474 When d_nnz, o_nnz parameters are specified, the storage is specified 2475 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2476 In the above case the values for d_nnz,o_nnz are: 2477 .vb 2478 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2479 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2480 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2481 .ve 2482 Here the space allocated is sum of all the above values i.e 34, and 2483 hence pre-allocation is perfect. 2484 2485 Level: intermediate 2486 2487 .keywords: matrix, aij, compressed row, sparse, parallel 2488 2489 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2490 MPIAIJ 2491 @*/ 2492 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2493 { 2494 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2495 2496 PetscFunctionBegin; 2497 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2498 if (f) { 2499 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2500 } 2501 PetscFunctionReturn(0); 2502 } 2503 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatCreateMPIAIJ" 2506 /*@C 2507 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2508 (the default parallel PETSc format). For good matrix assembly performance 2509 the user should preallocate the matrix storage by setting the parameters 2510 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2511 performance can be increased by more than a factor of 50. 2512 2513 Collective on MPI_Comm 2514 2515 Input Parameters: 2516 + comm - MPI communicator 2517 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2518 This value should be the same as the local size used in creating the 2519 y vector for the matrix-vector product y = Ax. 2520 . n - This value should be the same as the local size used in creating the 2521 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2522 calculated if N is given) For square matrices n is almost always m. 2523 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2524 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2525 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2526 (same value is used for all local rows) 2527 . d_nnz - array containing the number of nonzeros in the various rows of the 2528 DIAGONAL portion of the local submatrix (possibly different for each row) 2529 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2530 The size of this array is equal to the number of local rows, i.e 'm'. 2531 You must leave room for the diagonal entry even if it is zero. 2532 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2533 submatrix (same value is used for all local rows). 2534 - o_nnz - array containing the number of nonzeros in the various rows of the 2535 OFF-DIAGONAL portion of the local submatrix (possibly different for 2536 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2537 structure. The size of this array is equal to the number 2538 of local rows, i.e 'm'. 2539 2540 Output Parameter: 2541 . A - the matrix 2542 2543 Notes: 2544 If the *_nnz parameter is given then the *_nz parameter is ignored 2545 2546 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2547 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2548 storage requirements for this matrix. 2549 2550 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2551 processor than it must be used on all processors that share the object for 2552 that argument. 2553 2554 The user MUST specify either the local or global matrix dimensions 2555 (possibly both). 2556 2557 The parallel matrix is partitioned such that the first m0 rows belong to 2558 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2559 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2560 2561 The DIAGONAL portion of the local submatrix of a processor can be defined 2562 as the submatrix which is obtained by extraction the part corresponding 2563 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2564 first row that belongs to the processor, and r2 is the last row belonging 2565 to the this processor. This is a square mxm matrix. The remaining portion 2566 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2567 2568 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2569 2570 When calling this routine with a single process communicator, a matrix of 2571 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2572 type of communicator, use the construction mechanism: 2573 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2574 2575 By default, this format uses inodes (identical nodes) when possible. 2576 We search for consecutive rows with the same nonzero structure, thereby 2577 reusing matrix information to achieve increased efficiency. 2578 2579 Options Database Keys: 2580 + -mat_no_inode - Do not use inodes 2581 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2582 - -mat_aij_oneindex - Internally use indexing starting at 1 2583 rather than 0. Note that when calling MatSetValues(), 2584 the user still MUST index entries starting at 0! 2585 2586 2587 Example usage: 2588 2589 Consider the following 8x8 matrix with 34 non-zero values, that is 2590 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2591 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2592 as follows: 2593 2594 .vb 2595 1 2 0 | 0 3 0 | 0 4 2596 Proc0 0 5 6 | 7 0 0 | 8 0 2597 9 0 10 | 11 0 0 | 12 0 2598 ------------------------------------- 2599 13 0 14 | 15 16 17 | 0 0 2600 Proc1 0 18 0 | 19 20 21 | 0 0 2601 0 0 0 | 22 23 0 | 24 0 2602 ------------------------------------- 2603 Proc2 25 26 27 | 0 0 28 | 29 0 2604 30 0 0 | 31 32 33 | 0 34 2605 .ve 2606 2607 This can be represented as a collection of submatrices as: 2608 2609 .vb 2610 A B C 2611 D E F 2612 G H I 2613 .ve 2614 2615 Where the submatrices A,B,C are owned by proc0, D,E,F are 2616 owned by proc1, G,H,I are owned by proc2. 2617 2618 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2619 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2620 The 'M','N' parameters are 8,8, and have the same values on all procs. 2621 2622 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2623 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2624 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2625 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2626 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2627 matrix, ans [DF] as another SeqAIJ matrix. 2628 2629 When d_nz, o_nz parameters are specified, d_nz storage elements are 2630 allocated for every row of the local diagonal submatrix, and o_nz 2631 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2632 One way to choose d_nz and o_nz is to use the max nonzerors per local 2633 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2634 In this case, the values of d_nz,o_nz are: 2635 .vb 2636 proc0 : dnz = 2, o_nz = 2 2637 proc1 : dnz = 3, o_nz = 2 2638 proc2 : dnz = 1, o_nz = 4 2639 .ve 2640 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2641 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2642 for proc3. i.e we are using 12+15+10=37 storage locations to store 2643 34 values. 2644 2645 When d_nnz, o_nnz parameters are specified, the storage is specified 2646 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2647 In the above case the values for d_nnz,o_nnz are: 2648 .vb 2649 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2650 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2651 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2652 .ve 2653 Here the space allocated is sum of all the above values i.e 34, and 2654 hence pre-allocation is perfect. 2655 2656 Level: intermediate 2657 2658 .keywords: matrix, aij, compressed row, sparse, parallel 2659 2660 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2661 MPIAIJ 2662 @*/ 2663 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2664 { 2665 PetscErrorCode ierr; 2666 PetscMPIInt size; 2667 2668 PetscFunctionBegin; 2669 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2670 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2671 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2672 if (size > 1) { 2673 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2674 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2675 } else { 2676 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2677 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2678 } 2679 PetscFunctionReturn(0); 2680 } 2681 2682 #undef __FUNCT__ 2683 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2684 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2685 { 2686 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2687 2688 PetscFunctionBegin; 2689 *Ad = a->A; 2690 *Ao = a->B; 2691 *colmap = a->garray; 2692 PetscFunctionReturn(0); 2693 } 2694 2695 #undef __FUNCT__ 2696 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2697 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2698 { 2699 PetscErrorCode ierr; 2700 PetscInt i; 2701 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2702 2703 PetscFunctionBegin; 2704 if (coloring->ctype == IS_COLORING_LOCAL) { 2705 ISColoringValue *allcolors,*colors; 2706 ISColoring ocoloring; 2707 2708 /* set coloring for diagonal portion */ 2709 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2710 2711 /* set coloring for off-diagonal portion */ 2712 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2713 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2714 for (i=0; i<a->B->n; i++) { 2715 colors[i] = allcolors[a->garray[i]]; 2716 } 2717 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2718 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,coloring->n,colors,&ocoloring);CHKERRQ(ierr); 2719 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2720 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2721 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2722 ISColoringValue *colors; 2723 PetscInt *larray; 2724 ISColoring ocoloring; 2725 2726 /* set coloring for diagonal portion */ 2727 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2728 for (i=0; i<a->A->n; i++) { 2729 larray[i] = i + a->cstart; 2730 } 2731 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2732 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2733 for (i=0; i<a->A->n; i++) { 2734 colors[i] = coloring->colors[larray[i]]; 2735 } 2736 ierr = PetscFree(larray);CHKERRQ(ierr); 2737 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,coloring->n,colors,&ocoloring);CHKERRQ(ierr); 2738 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2739 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2740 2741 /* set coloring for off-diagonal portion */ 2742 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2743 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2744 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2745 for (i=0; i<a->B->n; i++) { 2746 colors[i] = coloring->colors[larray[i]]; 2747 } 2748 ierr = PetscFree(larray);CHKERRQ(ierr); 2749 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,coloring->n,colors,&ocoloring);CHKERRQ(ierr); 2750 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2751 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2752 } else { 2753 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2754 } 2755 2756 PetscFunctionReturn(0); 2757 } 2758 2759 #if defined(PETSC_HAVE_ADIC) 2760 #undef __FUNCT__ 2761 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2762 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2763 { 2764 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2765 PetscErrorCode ierr; 2766 2767 PetscFunctionBegin; 2768 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2769 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2770 PetscFunctionReturn(0); 2771 } 2772 #endif 2773 2774 #undef __FUNCT__ 2775 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2776 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2777 { 2778 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2779 PetscErrorCode ierr; 2780 2781 PetscFunctionBegin; 2782 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2783 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2784 PetscFunctionReturn(0); 2785 } 2786 2787 #undef __FUNCT__ 2788 #define __FUNCT__ "MatMerge" 2789 /*@C 2790 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2791 matrices from each processor 2792 2793 Collective on MPI_Comm 2794 2795 Input Parameters: 2796 + comm - the communicators the parallel matrix will live on 2797 . inmat - the input sequential matrices 2798 . n - number of local columns (or PETSC_DECIDE) 2799 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2800 2801 Output Parameter: 2802 . outmat - the parallel matrix generated 2803 2804 Level: advanced 2805 2806 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2807 2808 @*/ 2809 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2810 { 2811 PetscErrorCode ierr; 2812 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2813 PetscInt *indx; 2814 PetscScalar *values; 2815 2816 PetscFunctionBegin; 2817 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2818 if (scall == MAT_INITIAL_MATRIX){ 2819 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2820 if (n == PETSC_DECIDE){ 2821 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 2822 } 2823 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2824 rstart -= m; 2825 2826 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2827 for (i=0;i<m;i++) { 2828 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2829 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2830 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2831 } 2832 /* This routine will ONLY return MPIAIJ type matrix */ 2833 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2834 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2835 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2836 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2837 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2838 2839 } else if (scall == MAT_REUSE_MATRIX){ 2840 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2841 } else { 2842 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2843 } 2844 2845 for (i=0;i<m;i++) { 2846 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2847 I = i + rstart; 2848 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2849 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2850 } 2851 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2852 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2853 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2854 2855 PetscFunctionReturn(0); 2856 } 2857 2858 #undef __FUNCT__ 2859 #define __FUNCT__ "MatFileSplit" 2860 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2861 { 2862 PetscErrorCode ierr; 2863 PetscMPIInt rank; 2864 PetscInt m,N,i,rstart,nnz; 2865 size_t len; 2866 const PetscInt *indx; 2867 PetscViewer out; 2868 char *name; 2869 Mat B; 2870 const PetscScalar *values; 2871 2872 PetscFunctionBegin; 2873 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2874 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2875 /* Should this be the type of the diagonal block of A? */ 2876 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2877 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 2878 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2879 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2880 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2881 for (i=0;i<m;i++) { 2882 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2883 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2884 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2885 } 2886 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2887 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2888 2889 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2890 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2891 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2892 sprintf(name,"%s.%d",outfile,rank); 2893 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 2894 ierr = PetscFree(name); 2895 ierr = MatView(B,out);CHKERRQ(ierr); 2896 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2897 ierr = MatDestroy(B);CHKERRQ(ierr); 2898 PetscFunctionReturn(0); 2899 } 2900 2901 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2902 #undef __FUNCT__ 2903 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2904 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2905 { 2906 PetscErrorCode ierr; 2907 Mat_Merge_SeqsToMPI *merge; 2908 PetscObjectContainer container; 2909 2910 PetscFunctionBegin; 2911 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2912 if (container) { 2913 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2914 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2915 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2916 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2917 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2918 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2919 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2920 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2921 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2922 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2923 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2924 2925 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2926 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2927 } 2928 ierr = PetscFree(merge);CHKERRQ(ierr); 2929 2930 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2931 PetscFunctionReturn(0); 2932 } 2933 2934 #include "src/mat/utils/freespace.h" 2935 #include "petscbt.h" 2936 static PetscEvent logkey_seqstompinum = 0; 2937 #undef __FUNCT__ 2938 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 2939 /*@C 2940 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2941 matrices from each processor 2942 2943 Collective on MPI_Comm 2944 2945 Input Parameters: 2946 + comm - the communicators the parallel matrix will live on 2947 . seqmat - the input sequential matrices 2948 . m - number of local rows (or PETSC_DECIDE) 2949 . n - number of local columns (or PETSC_DECIDE) 2950 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2951 2952 Output Parameter: 2953 . mpimat - the parallel matrix generated 2954 2955 Level: advanced 2956 2957 Notes: 2958 The dimensions of the sequential matrix in each processor MUST be the same. 2959 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2960 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2961 @*/ 2962 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2963 { 2964 PetscErrorCode ierr; 2965 MPI_Comm comm=mpimat->comm; 2966 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2967 PetscMPIInt size,rank,taga,*len_s; 2968 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2969 PetscInt proc,m; 2970 PetscInt **buf_ri,**buf_rj; 2971 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2972 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2973 MPI_Request *s_waits,*r_waits; 2974 MPI_Status *status; 2975 MatScalar *aa=a->a,**abuf_r,*ba_i; 2976 Mat_Merge_SeqsToMPI *merge; 2977 PetscObjectContainer container; 2978 2979 PetscFunctionBegin; 2980 if (!logkey_seqstompinum) { 2981 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2982 } 2983 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2984 2985 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2986 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2987 2988 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2989 if (container) { 2990 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2991 } 2992 bi = merge->bi; 2993 bj = merge->bj; 2994 buf_ri = merge->buf_ri; 2995 buf_rj = merge->buf_rj; 2996 2997 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2998 owners = merge->rowmap.range; 2999 len_s = merge->len_s; 3000 3001 /* send and recv matrix values */ 3002 /*-----------------------------*/ 3003 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3004 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3005 3006 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 3007 for (proc=0,k=0; proc<size; proc++){ 3008 if (!len_s[proc]) continue; 3009 i = owners[proc]; 3010 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3011 k++; 3012 } 3013 3014 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3015 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3016 ierr = PetscFree(status);CHKERRQ(ierr); 3017 3018 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3019 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3020 3021 /* insert mat values of mpimat */ 3022 /*----------------------------*/ 3023 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3024 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3025 nextrow = buf_ri_k + merge->nrecv; 3026 nextai = nextrow + merge->nrecv; 3027 3028 for (k=0; k<merge->nrecv; k++){ 3029 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3030 nrows = *(buf_ri_k[k]); 3031 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3032 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3033 } 3034 3035 /* set values of ba */ 3036 m = merge->rowmap.n; 3037 for (i=0; i<m; i++) { 3038 arow = owners[rank] + i; 3039 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3040 bnzi = bi[i+1] - bi[i]; 3041 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3042 3043 /* add local non-zero vals of this proc's seqmat into ba */ 3044 anzi = ai[arow+1] - ai[arow]; 3045 aj = a->j + ai[arow]; 3046 aa = a->a + ai[arow]; 3047 nextaj = 0; 3048 for (j=0; nextaj<anzi; j++){ 3049 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3050 ba_i[j] += aa[nextaj++]; 3051 } 3052 } 3053 3054 /* add received vals into ba */ 3055 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3056 /* i-th row */ 3057 if (i == *nextrow[k]) { 3058 anzi = *(nextai[k]+1) - *nextai[k]; 3059 aj = buf_rj[k] + *(nextai[k]); 3060 aa = abuf_r[k] + *(nextai[k]); 3061 nextaj = 0; 3062 for (j=0; nextaj<anzi; j++){ 3063 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3064 ba_i[j] += aa[nextaj++]; 3065 } 3066 } 3067 nextrow[k]++; nextai[k]++; 3068 } 3069 } 3070 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3071 } 3072 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3073 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3074 3075 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3076 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3077 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3078 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3079 PetscFunctionReturn(0); 3080 } 3081 3082 static PetscEvent logkey_seqstompisym = 0; 3083 #undef __FUNCT__ 3084 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3085 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3086 { 3087 PetscErrorCode ierr; 3088 Mat B_mpi; 3089 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3090 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3091 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3092 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3093 PetscInt len,proc,*dnz,*onz; 3094 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3095 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3096 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3097 MPI_Status *status; 3098 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3099 PetscBT lnkbt; 3100 Mat_Merge_SeqsToMPI *merge; 3101 PetscObjectContainer container; 3102 3103 PetscFunctionBegin; 3104 if (!logkey_seqstompisym) { 3105 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3106 } 3107 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3108 3109 /* make sure it is a PETSc comm */ 3110 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3111 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3112 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3113 3114 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3115 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3116 3117 /* determine row ownership */ 3118 /*---------------------------------------------------------*/ 3119 ierr = PetscMapInitialize(comm,m,M,&merge->rowmap);CHKERRQ(ierr); 3120 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3121 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3122 3123 m = merge->rowmap.n; 3124 M = merge->rowmap.N; 3125 owners = merge->rowmap.range; 3126 3127 /* determine the number of messages to send, their lengths */ 3128 /*---------------------------------------------------------*/ 3129 len_s = merge->len_s; 3130 3131 len = 0; /* length of buf_si[] */ 3132 merge->nsend = 0; 3133 for (proc=0; proc<size; proc++){ 3134 len_si[proc] = 0; 3135 if (proc == rank){ 3136 len_s[proc] = 0; 3137 } else { 3138 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3139 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3140 } 3141 if (len_s[proc]) { 3142 merge->nsend++; 3143 nrows = 0; 3144 for (i=owners[proc]; i<owners[proc+1]; i++){ 3145 if (ai[i+1] > ai[i]) nrows++; 3146 } 3147 len_si[proc] = 2*(nrows+1); 3148 len += len_si[proc]; 3149 } 3150 } 3151 3152 /* determine the number and length of messages to receive for ij-structure */ 3153 /*-------------------------------------------------------------------------*/ 3154 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3155 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3156 3157 /* post the Irecv of j-structure */ 3158 /*-------------------------------*/ 3159 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&tagj);CHKERRQ(ierr); 3160 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3161 3162 /* post the Isend of j-structure */ 3163 /*--------------------------------*/ 3164 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3165 sj_waits = si_waits + merge->nsend; 3166 3167 for (proc=0, k=0; proc<size; proc++){ 3168 if (!len_s[proc]) continue; 3169 i = owners[proc]; 3170 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3171 k++; 3172 } 3173 3174 /* receives and sends of j-structure are complete */ 3175 /*------------------------------------------------*/ 3176 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3177 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3178 3179 /* send and recv i-structure */ 3180 /*---------------------------*/ 3181 ierr = PetscObjectGetNewTag((PetscObject)merge,&tagi);CHKERRQ(ierr); 3182 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3183 3184 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3185 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3186 for (proc=0,k=0; proc<size; proc++){ 3187 if (!len_s[proc]) continue; 3188 /* form outgoing message for i-structure: 3189 buf_si[0]: nrows to be sent 3190 [1:nrows]: row index (global) 3191 [nrows+1:2*nrows+1]: i-structure index 3192 */ 3193 /*-------------------------------------------*/ 3194 nrows = len_si[proc]/2 - 1; 3195 buf_si_i = buf_si + nrows+1; 3196 buf_si[0] = nrows; 3197 buf_si_i[0] = 0; 3198 nrows = 0; 3199 for (i=owners[proc]; i<owners[proc+1]; i++){ 3200 anzi = ai[i+1] - ai[i]; 3201 if (anzi) { 3202 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3203 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3204 nrows++; 3205 } 3206 } 3207 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3208 k++; 3209 buf_si += len_si[proc]; 3210 } 3211 3212 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3213 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3214 3215 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3216 for (i=0; i<merge->nrecv; i++){ 3217 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3218 } 3219 3220 ierr = PetscFree(len_si);CHKERRQ(ierr); 3221 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3222 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3223 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3224 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3225 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3226 ierr = PetscFree(status);CHKERRQ(ierr); 3227 3228 /* compute a local seq matrix in each processor */ 3229 /*----------------------------------------------*/ 3230 /* allocate bi array and free space for accumulating nonzero column info */ 3231 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3232 bi[0] = 0; 3233 3234 /* create and initialize a linked list */ 3235 nlnk = N+1; 3236 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3237 3238 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3239 len = 0; 3240 len = ai[owners[rank+1]] - ai[owners[rank]]; 3241 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3242 current_space = free_space; 3243 3244 /* determine symbolic info for each local row */ 3245 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3246 nextrow = buf_ri_k + merge->nrecv; 3247 nextai = nextrow + merge->nrecv; 3248 for (k=0; k<merge->nrecv; k++){ 3249 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3250 nrows = *buf_ri_k[k]; 3251 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3252 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3253 } 3254 3255 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3256 len = 0; 3257 for (i=0;i<m;i++) { 3258 bnzi = 0; 3259 /* add local non-zero cols of this proc's seqmat into lnk */ 3260 arow = owners[rank] + i; 3261 anzi = ai[arow+1] - ai[arow]; 3262 aj = a->j + ai[arow]; 3263 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3264 bnzi += nlnk; 3265 /* add received col data into lnk */ 3266 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3267 if (i == *nextrow[k]) { /* i-th row */ 3268 anzi = *(nextai[k]+1) - *nextai[k]; 3269 aj = buf_rj[k] + *nextai[k]; 3270 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3271 bnzi += nlnk; 3272 nextrow[k]++; nextai[k]++; 3273 } 3274 } 3275 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3276 3277 /* if free space is not available, make more free space */ 3278 if (current_space->local_remaining<bnzi) { 3279 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3280 nspacedouble++; 3281 } 3282 /* copy data into free space, then initialize lnk */ 3283 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3284 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3285 3286 current_space->array += bnzi; 3287 current_space->local_used += bnzi; 3288 current_space->local_remaining -= bnzi; 3289 3290 bi[i+1] = bi[i] + bnzi; 3291 } 3292 3293 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3294 3295 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3296 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3297 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3298 3299 /* create symbolic parallel matrix B_mpi */ 3300 /*---------------------------------------*/ 3301 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3302 if (n==PETSC_DECIDE) { 3303 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3304 } else { 3305 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3306 } 3307 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3308 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3309 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3310 3311 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3312 B_mpi->assembled = PETSC_FALSE; 3313 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3314 merge->bi = bi; 3315 merge->bj = bj; 3316 merge->buf_ri = buf_ri; 3317 merge->buf_rj = buf_rj; 3318 merge->coi = PETSC_NULL; 3319 merge->coj = PETSC_NULL; 3320 merge->owners_co = PETSC_NULL; 3321 3322 /* attach the supporting struct to B_mpi for reuse */ 3323 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3324 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3325 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3326 *mpimat = B_mpi; 3327 3328 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3329 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3330 PetscFunctionReturn(0); 3331 } 3332 3333 static PetscEvent logkey_seqstompi = 0; 3334 #undef __FUNCT__ 3335 #define __FUNCT__ "MatMerge_SeqsToMPI" 3336 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3337 { 3338 PetscErrorCode ierr; 3339 3340 PetscFunctionBegin; 3341 if (!logkey_seqstompi) { 3342 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3343 } 3344 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3345 if (scall == MAT_INITIAL_MATRIX){ 3346 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3347 } 3348 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3349 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3350 PetscFunctionReturn(0); 3351 } 3352 static PetscEvent logkey_getlocalmat = 0; 3353 #undef __FUNCT__ 3354 #define __FUNCT__ "MatGetLocalMat" 3355 /*@C 3356 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3357 3358 Not Collective 3359 3360 Input Parameters: 3361 + A - the matrix 3362 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3363 3364 Output Parameter: 3365 . A_loc - the local sequential matrix generated 3366 3367 Level: developer 3368 3369 @*/ 3370 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3371 { 3372 PetscErrorCode ierr; 3373 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3374 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3375 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3376 PetscScalar *aa=a->a,*ba=b->a,*ca; 3377 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3378 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3379 3380 PetscFunctionBegin; 3381 if (!logkey_getlocalmat) { 3382 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3383 } 3384 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3385 if (scall == MAT_INITIAL_MATRIX){ 3386 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3387 ci[0] = 0; 3388 for (i=0; i<am; i++){ 3389 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3390 } 3391 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3392 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3393 k = 0; 3394 for (i=0; i<am; i++) { 3395 ncols_o = bi[i+1] - bi[i]; 3396 ncols_d = ai[i+1] - ai[i]; 3397 /* off-diagonal portion of A */ 3398 for (jo=0; jo<ncols_o; jo++) { 3399 col = cmap[*bj]; 3400 if (col >= cstart) break; 3401 cj[k] = col; bj++; 3402 ca[k++] = *ba++; 3403 } 3404 /* diagonal portion of A */ 3405 for (j=0; j<ncols_d; j++) { 3406 cj[k] = cstart + *aj++; 3407 ca[k++] = *aa++; 3408 } 3409 /* off-diagonal portion of A */ 3410 for (j=jo; j<ncols_o; j++) { 3411 cj[k] = cmap[*bj++]; 3412 ca[k++] = *ba++; 3413 } 3414 } 3415 /* put together the new matrix */ 3416 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3417 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3418 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3419 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3420 mat->freedata = PETSC_TRUE; 3421 mat->nonew = 0; 3422 } else if (scall == MAT_REUSE_MATRIX){ 3423 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3424 ci = mat->i; cj = mat->j; ca = mat->a; 3425 for (i=0; i<am; i++) { 3426 /* off-diagonal portion of A */ 3427 ncols_o = bi[i+1] - bi[i]; 3428 for (jo=0; jo<ncols_o; jo++) { 3429 col = cmap[*bj]; 3430 if (col >= cstart) break; 3431 *ca++ = *ba++; bj++; 3432 } 3433 /* diagonal portion of A */ 3434 ncols_d = ai[i+1] - ai[i]; 3435 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3436 /* off-diagonal portion of A */ 3437 for (j=jo; j<ncols_o; j++) { 3438 *ca++ = *ba++; bj++; 3439 } 3440 } 3441 } else { 3442 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3443 } 3444 3445 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3446 PetscFunctionReturn(0); 3447 } 3448 3449 static PetscEvent logkey_getlocalmatcondensed = 0; 3450 #undef __FUNCT__ 3451 #define __FUNCT__ "MatGetLocalMatCondensed" 3452 /*@C 3453 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3454 3455 Not Collective 3456 3457 Input Parameters: 3458 + A - the matrix 3459 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3460 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3461 3462 Output Parameter: 3463 . A_loc - the local sequential matrix generated 3464 3465 Level: developer 3466 3467 @*/ 3468 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3469 { 3470 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3471 PetscErrorCode ierr; 3472 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3473 IS isrowa,iscola; 3474 Mat *aloc; 3475 3476 PetscFunctionBegin; 3477 if (!logkey_getlocalmatcondensed) { 3478 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3479 } 3480 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3481 if (!row){ 3482 start = a->rstart; end = a->rend; 3483 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3484 } else { 3485 isrowa = *row; 3486 } 3487 if (!col){ 3488 start = a->cstart; 3489 cmap = a->garray; 3490 nzA = a->A->n; 3491 nzB = a->B->n; 3492 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3493 ncols = 0; 3494 for (i=0; i<nzB; i++) { 3495 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3496 else break; 3497 } 3498 imark = i; 3499 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3500 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3501 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3502 ierr = PetscFree(idx);CHKERRQ(ierr); 3503 } else { 3504 iscola = *col; 3505 } 3506 if (scall != MAT_INITIAL_MATRIX){ 3507 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3508 aloc[0] = *A_loc; 3509 } 3510 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3511 *A_loc = aloc[0]; 3512 ierr = PetscFree(aloc);CHKERRQ(ierr); 3513 if (!row){ 3514 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3515 } 3516 if (!col){ 3517 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3518 } 3519 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3520 PetscFunctionReturn(0); 3521 } 3522 3523 static PetscEvent logkey_GetBrowsOfAcols = 0; 3524 #undef __FUNCT__ 3525 #define __FUNCT__ "MatGetBrowsOfAcols" 3526 /*@C 3527 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3528 3529 Collective on Mat 3530 3531 Input Parameters: 3532 + A,B - the matrices in mpiaij format 3533 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3534 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3535 3536 Output Parameter: 3537 + rowb, colb - index sets of rows and columns of B to extract 3538 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3539 - B_seq - the sequential matrix generated 3540 3541 Level: developer 3542 3543 @*/ 3544 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3545 { 3546 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3547 PetscErrorCode ierr; 3548 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3549 IS isrowb,iscolb; 3550 Mat *bseq; 3551 3552 PetscFunctionBegin; 3553 if (a->cstart != b->rstart || a->cend != b->rend){ 3554 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3555 } 3556 if (!logkey_GetBrowsOfAcols) { 3557 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3558 } 3559 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3560 3561 if (scall == MAT_INITIAL_MATRIX){ 3562 start = a->cstart; 3563 cmap = a->garray; 3564 nzA = a->A->n; 3565 nzB = a->B->n; 3566 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3567 ncols = 0; 3568 for (i=0; i<nzB; i++) { /* row < local row index */ 3569 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3570 else break; 3571 } 3572 imark = i; 3573 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3574 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3575 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3576 ierr = PetscFree(idx);CHKERRQ(ierr); 3577 *brstart = imark; 3578 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3579 } else { 3580 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3581 isrowb = *rowb; iscolb = *colb; 3582 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3583 bseq[0] = *B_seq; 3584 } 3585 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3586 *B_seq = bseq[0]; 3587 ierr = PetscFree(bseq);CHKERRQ(ierr); 3588 if (!rowb){ 3589 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3590 } else { 3591 *rowb = isrowb; 3592 } 3593 if (!colb){ 3594 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3595 } else { 3596 *colb = iscolb; 3597 } 3598 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3599 PetscFunctionReturn(0); 3600 } 3601 3602 static PetscEvent logkey_GetBrowsOfAocols = 0; 3603 #undef __FUNCT__ 3604 #define __FUNCT__ "MatGetBrowsOfAoCols" 3605 /*@C 3606 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3607 of the OFF-DIAGONAL portion of local A 3608 3609 Collective on Mat 3610 3611 Input Parameters: 3612 + A,B - the matrices in mpiaij format 3613 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3614 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3615 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3616 3617 Output Parameter: 3618 + B_oth - the sequential matrix generated 3619 3620 Level: developer 3621 3622 @*/ 3623 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3624 { 3625 VecScatter_MPI_General *gen_to,*gen_from; 3626 PetscErrorCode ierr; 3627 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3628 Mat_SeqAIJ *b_oth; 3629 VecScatter ctx=a->Mvctx; 3630 MPI_Comm comm=ctx->comm; 3631 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3632 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3633 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3634 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3635 MPI_Request *rwaits,*swaits; 3636 MPI_Status *sstatus,rstatus; 3637 PetscInt *cols; 3638 PetscScalar *vals; 3639 PetscMPIInt j; 3640 3641 PetscFunctionBegin; 3642 if (a->cstart != b->rstart || a->cend != b->rend){ 3643 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3644 } 3645 if (!logkey_GetBrowsOfAocols) { 3646 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3647 } 3648 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3649 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3650 3651 gen_to = (VecScatter_MPI_General*)ctx->todata; 3652 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3653 rvalues = gen_from->values; /* holds the length of sending row */ 3654 svalues = gen_to->values; /* holds the length of receiving row */ 3655 nrecvs = gen_from->n; 3656 nsends = gen_to->n; 3657 rwaits = gen_from->requests; 3658 swaits = gen_to->requests; 3659 srow = gen_to->indices; /* local row index to be sent */ 3660 rstarts = gen_from->starts; 3661 sstarts = gen_to->starts; 3662 rprocs = gen_from->procs; 3663 sprocs = gen_to->procs; 3664 sstatus = gen_to->sstatus; 3665 3666 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3667 if (scall == MAT_INITIAL_MATRIX){ 3668 /* i-array */ 3669 /*---------*/ 3670 /* post receives */ 3671 for (i=0; i<nrecvs; i++){ 3672 rowlen = (PetscInt*)rvalues + rstarts[i]; 3673 nrows = rstarts[i+1]-rstarts[i]; 3674 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3675 } 3676 3677 /* pack the outgoing message */ 3678 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3679 rstartsj = sstartsj + nsends +1; 3680 sstartsj[0] = 0; rstartsj[0] = 0; 3681 len = 0; /* total length of j or a array to be sent */ 3682 k = 0; 3683 for (i=0; i<nsends; i++){ 3684 rowlen = (PetscInt*)svalues + sstarts[i]; 3685 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3686 for (j=0; j<nrows; j++) { 3687 row = srow[k] + b->rowners[rank]; /* global row idx */ 3688 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3689 len += rowlen[j]; 3690 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3691 k++; 3692 } 3693 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3694 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3695 } 3696 /* recvs and sends of i-array are completed */ 3697 i = nrecvs; 3698 while (i--) { 3699 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3700 } 3701 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3702 /* allocate buffers for sending j and a arrays */ 3703 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3704 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3705 3706 /* create i-array of B_oth */ 3707 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3708 b_othi[0] = 0; 3709 len = 0; /* total length of j or a array to be received */ 3710 k = 0; 3711 for (i=0; i<nrecvs; i++){ 3712 rowlen = (PetscInt*)rvalues + rstarts[i]; 3713 nrows = rstarts[i+1]-rstarts[i]; 3714 for (j=0; j<nrows; j++) { 3715 b_othi[k+1] = b_othi[k] + rowlen[j]; 3716 len += rowlen[j]; k++; 3717 } 3718 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3719 } 3720 3721 /* allocate space for j and a arrrays of B_oth */ 3722 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3723 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3724 3725 /* j-array */ 3726 /*---------*/ 3727 /* post receives of j-array */ 3728 for (i=0; i<nrecvs; i++){ 3729 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3730 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3731 } 3732 k = 0; 3733 for (i=0; i<nsends; i++){ 3734 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3735 bufJ = bufj+sstartsj[i]; 3736 for (j=0; j<nrows; j++) { 3737 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3738 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3739 for (l=0; l<ncols; l++){ 3740 *bufJ++ = cols[l]; 3741 } 3742 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3743 } 3744 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3745 } 3746 3747 /* recvs and sends of j-array are completed */ 3748 i = nrecvs; 3749 while (i--) { 3750 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3751 } 3752 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3753 } else if (scall == MAT_REUSE_MATRIX){ 3754 sstartsj = *startsj; 3755 rstartsj = sstartsj + nsends +1; 3756 bufa = *bufa_ptr; 3757 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3758 b_otha = b_oth->a; 3759 } else { 3760 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3761 } 3762 3763 /* a-array */ 3764 /*---------*/ 3765 /* post receives of a-array */ 3766 for (i=0; i<nrecvs; i++){ 3767 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3768 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3769 } 3770 k = 0; 3771 for (i=0; i<nsends; i++){ 3772 nrows = sstarts[i+1]-sstarts[i]; 3773 bufA = bufa+sstartsj[i]; 3774 for (j=0; j<nrows; j++) { 3775 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3776 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3777 for (l=0; l<ncols; l++){ 3778 *bufA++ = vals[l]; 3779 } 3780 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3781 3782 } 3783 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3784 } 3785 /* recvs and sends of a-array are completed */ 3786 i = nrecvs; 3787 while (i--) { 3788 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3789 } 3790 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3791 3792 if (scall == MAT_INITIAL_MATRIX){ 3793 /* put together the new matrix */ 3794 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3795 3796 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3797 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3798 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3799 b_oth->freedata = PETSC_TRUE; 3800 b_oth->nonew = 0; 3801 3802 ierr = PetscFree(bufj);CHKERRQ(ierr); 3803 if (!startsj || !bufa_ptr){ 3804 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3805 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3806 } else { 3807 *startsj = sstartsj; 3808 *bufa_ptr = bufa; 3809 } 3810 } 3811 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3812 3813 PetscFunctionReturn(0); 3814 } 3815 3816 #undef __FUNCT__ 3817 #define __FUNCT__ "MatGetCommunicationStructs" 3818 /*@C 3819 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 3820 3821 Not Collective 3822 3823 Input Parameters: 3824 . A - The matrix in mpiaij format 3825 3826 Output Parameter: 3827 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 3828 . colmap - A map from global column index to local index into lvec 3829 - multScatter - A scatter from the argument of a matrix-vector product to lvec 3830 3831 Level: developer 3832 3833 @*/ 3834 #if defined (PETSC_USE_CTABLE) 3835 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 3836 #else 3837 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 3838 #endif 3839 { 3840 Mat_MPIAIJ *a; 3841 3842 PetscFunctionBegin; 3843 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 3844 PetscValidPointer(lvec, 2) 3845 PetscValidPointer(colmap, 3) 3846 PetscValidPointer(multScatter, 4) 3847 a = (Mat_MPIAIJ *) A->data; 3848 if (lvec) *lvec = a->lvec; 3849 if (colmap) *colmap = a->colmap; 3850 if (multScatter) *multScatter = a->Mvctx; 3851 PetscFunctionReturn(0); 3852 } 3853 3854 /*MC 3855 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3856 3857 Options Database Keys: 3858 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3859 3860 Level: beginner 3861 3862 .seealso: MatCreateMPIAIJ 3863 M*/ 3864 3865 EXTERN_C_BEGIN 3866 #undef __FUNCT__ 3867 #define __FUNCT__ "MatCreate_MPIAIJ" 3868 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3869 { 3870 Mat_MPIAIJ *b; 3871 PetscErrorCode ierr; 3872 PetscInt i; 3873 PetscMPIInt size; 3874 3875 PetscFunctionBegin; 3876 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3877 3878 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3879 B->data = (void*)b; 3880 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3881 B->factor = 0; 3882 B->bs = 1; 3883 B->assembled = PETSC_FALSE; 3884 B->mapping = 0; 3885 3886 B->insertmode = NOT_SET_VALUES; 3887 b->size = size; 3888 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3889 3890 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3891 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3892 3893 /* the information in the maps duplicates the information computed below, eventually 3894 we should remove the duplicate information that is not contained in the maps */ 3895 ierr = PetscMapInitialize(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3896 ierr = PetscMapInitialize(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3897 3898 /* build local table of row and column ownerships */ 3899 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3900 ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3901 b->cowners = b->rowners + b->size + 2; 3902 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3903 b->rowners[0] = 0; 3904 for (i=2; i<=b->size; i++) { 3905 b->rowners[i] += b->rowners[i-1]; 3906 } 3907 b->rstart = b->rowners[b->rank]; 3908 b->rend = b->rowners[b->rank+1]; 3909 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3910 b->cowners[0] = 0; 3911 for (i=2; i<=b->size; i++) { 3912 b->cowners[i] += b->cowners[i-1]; 3913 } 3914 b->cstart = b->cowners[b->rank]; 3915 b->cend = b->cowners[b->rank+1]; 3916 3917 /* build cache for off array entries formed */ 3918 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3919 b->donotstash = PETSC_FALSE; 3920 b->colmap = 0; 3921 b->garray = 0; 3922 b->roworiented = PETSC_TRUE; 3923 3924 /* stuff used for matrix vector multiply */ 3925 b->lvec = PETSC_NULL; 3926 b->Mvctx = PETSC_NULL; 3927 3928 /* stuff for MatGetRow() */ 3929 b->rowindices = 0; 3930 b->rowvalues = 0; 3931 b->getrowactive = PETSC_FALSE; 3932 3933 /* Explicitly create 2 MATSEQAIJ matrices. */ 3934 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3935 ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr); 3936 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3937 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3938 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3939 ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr); 3940 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3941 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3942 3943 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3944 "MatStoreValues_MPIAIJ", 3945 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3946 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3947 "MatRetrieveValues_MPIAIJ", 3948 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3949 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3950 "MatGetDiagonalBlock_MPIAIJ", 3951 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3952 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3953 "MatIsTranspose_MPIAIJ", 3954 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3955 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3956 "MatMPIAIJSetPreallocation_MPIAIJ", 3957 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3958 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3959 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3960 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3961 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3962 "MatDiagonalScaleLocal_MPIAIJ", 3963 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3964 PetscFunctionReturn(0); 3965 } 3966 EXTERN_C_END 3967 3968 /* 3969 Special version for direct calls from Fortran 3970 */ 3971 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3972 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 3973 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3974 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 3975 #endif 3976 3977 /* Change these macros so can be used in void function */ 3978 #undef CHKERRQ 3979 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr) 3980 #undef SETERRQ2 3981 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr) 3982 #undef SETERRQ 3983 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr) 3984 3985 EXTERN_C_BEGIN 3986 #undef __FUNCT__ 3987 #define __FUNCT__ "matsetvaluesmpiaij_" 3988 void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv) 3989 { 3990 Mat mat = *mmat; 3991 PetscInt m = *mm, n = *mn; 3992 InsertMode addv = *maddv; 3993 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3994 PetscScalar value; 3995 PetscErrorCode ierr; 3996 MatPreallocated(mat); 3997 if (mat->insertmode == NOT_SET_VALUES) { 3998 mat->insertmode = addv; 3999 } 4000 #if defined(PETSC_USE_DEBUG) 4001 else if (mat->insertmode != addv) { 4002 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 4003 } 4004 #endif 4005 { 4006 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 4007 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 4008 PetscTruth roworiented = aij->roworiented; 4009 4010 /* Some Variables required in the macro */ 4011 Mat A = aij->A; 4012 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 4013 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 4014 PetscScalar *aa = a->a; 4015 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 4016 Mat B = aij->B; 4017 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 4018 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 4019 PetscScalar *ba = b->a; 4020 4021 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 4022 PetscInt nonew = a->nonew; 4023 PetscScalar *ap1,*ap2; 4024 4025 PetscFunctionBegin; 4026 for (i=0; i<m; i++) { 4027 if (im[i] < 0) continue; 4028 #if defined(PETSC_USE_DEBUG) 4029 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 4030 #endif 4031 if (im[i] >= rstart && im[i] < rend) { 4032 row = im[i] - rstart; 4033 lastcol1 = -1; 4034 rp1 = aj + ai[row]; 4035 ap1 = aa + ai[row]; 4036 rmax1 = aimax[row]; 4037 nrow1 = ailen[row]; 4038 low1 = 0; 4039 high1 = nrow1; 4040 lastcol2 = -1; 4041 rp2 = bj + bi[row]; 4042 ap2 = ba + bi[row]; 4043 rmax2 = bimax[row]; 4044 nrow2 = bilen[row]; 4045 low2 = 0; 4046 high2 = nrow2; 4047 4048 for (j=0; j<n; j++) { 4049 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 4050 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 4051 if (in[j] >= cstart && in[j] < cend){ 4052 col = in[j] - cstart; 4053 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 4054 } else if (in[j] < 0) continue; 4055 #if defined(PETSC_USE_DEBUG) 4056 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 4057 #endif 4058 else { 4059 if (mat->was_assembled) { 4060 if (!aij->colmap) { 4061 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 4062 } 4063 #if defined (PETSC_USE_CTABLE) 4064 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 4065 col--; 4066 #else 4067 col = aij->colmap[in[j]] - 1; 4068 #endif 4069 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 4070 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 4071 col = in[j]; 4072 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 4073 B = aij->B; 4074 b = (Mat_SeqAIJ*)B->data; 4075 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 4076 rp2 = bj + bi[row]; 4077 ap2 = ba + bi[row]; 4078 rmax2 = bimax[row]; 4079 nrow2 = bilen[row]; 4080 low2 = 0; 4081 high2 = nrow2; 4082 bm = aij->B->m; 4083 ba = b->a; 4084 } 4085 } else col = in[j]; 4086 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 4087 } 4088 } 4089 } else { 4090 if (!aij->donotstash) { 4091 if (roworiented) { 4092 if (ignorezeroentries && v[i*n] == 0.0) continue; 4093 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 4094 } else { 4095 if (ignorezeroentries && v[i] == 0.0) continue; 4096 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 4097 } 4098 } 4099 } 4100 }} 4101 PetscFunctionReturnVoid(); 4102 } 4103 EXTERN_C_END 4104