1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 37 #define CHUNKSIZE 15 38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 39 { \ 40 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 41 lastcol1 = col;\ 42 while (high1-low1 > 5) { \ 43 t = (low1+high1)/2; \ 44 if (rp1[t] > col) high1 = t; \ 45 else low1 = t; \ 46 } \ 47 for (_i=low1; _i<high1; _i++) { \ 48 if (rp1[_i] > col) break; \ 49 if (rp1[_i] == col) { \ 50 if (addv == ADD_VALUES) ap1[_i] += value; \ 51 else ap1[_i] = value; \ 52 goto a_noinsert; \ 53 } \ 54 } \ 55 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 56 if (nonew == 1) goto a_noinsert; \ 57 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 58 MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \ 59 N = nrow1++ - 1; a->nz++; \ 60 /* shift up all the later entries in this row */ \ 61 for (ii=N; ii>=_i; ii--) { \ 62 rp1[ii+1] = rp1[ii]; \ 63 ap1[ii+1] = ap1[ii]; \ 64 } \ 65 rp1[_i] = col; \ 66 ap1[_i] = value; \ 67 a_noinsert: ; \ 68 ailen[row] = nrow1; \ 69 } 70 71 72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 73 { \ 74 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 75 lastcol2 = col;\ 76 while (high2-low2 > 5) { \ 77 t = (low2+high2)/2; \ 78 if (rp2[t] > col) high2 = t; \ 79 else low2 = t; \ 80 } \ 81 for (_i=low2; _i<high2; _i++) { \ 82 if (rp2[_i] > col) break; \ 83 if (rp2[_i] == col) { \ 84 if (addv == ADD_VALUES) ap2[_i] += value; \ 85 else ap2[_i] = value; \ 86 goto b_noinsert; \ 87 } \ 88 } \ 89 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 90 if (nonew == 1) goto b_noinsert; \ 91 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 92 MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \ 93 N = nrow2++ - 1; b->nz++; \ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp2[ii+1] = rp2[ii]; \ 97 ap2[ii+1] = ap2[ii]; \ 98 } \ 99 rp2[_i] = col; \ 100 ap2[_i] = value; \ 101 b_noinsert: ; \ 102 bilen[row] = nrow2; \ 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 108 { 109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 110 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 111 PetscErrorCode ierr; 112 PetscInt l,*garray = mat->garray,diag; 113 114 PetscFunctionBegin; 115 /* code only works for square matrices A */ 116 117 /* find size of row to the left of the diagonal part */ 118 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 119 row = row - diag; 120 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 121 if (garray[b->j[b->i[row]+l]] > diag) break; 122 } 123 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 124 125 /* diagonal part */ 126 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 127 128 /* right of diagonal part */ 129 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetValues_MPIAIJ" 135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 136 { 137 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 138 PetscScalar value; 139 PetscErrorCode ierr; 140 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 141 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 142 PetscTruth roworiented = aij->roworiented; 143 144 /* Some Variables required in the macro */ 145 Mat A = aij->A; 146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 147 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 148 PetscScalar *aa = a->a; 149 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 150 Mat B = aij->B; 151 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 152 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 153 PetscScalar *ba = b->a; 154 155 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 156 PetscInt nonew = a->nonew; 157 PetscScalar *ap1,*ap2; 158 159 PetscFunctionBegin; 160 for (i=0; i<m; i++) { 161 if (im[i] < 0) continue; 162 #if defined(PETSC_USE_DEBUG) 163 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 164 #endif 165 if (im[i] >= rstart && im[i] < rend) { 166 row = im[i] - rstart; 167 lastcol1 = -1; 168 rp1 = aj + ai[row]; 169 ap1 = aa + ai[row]; 170 rmax1 = aimax[row]; 171 nrow1 = ailen[row]; 172 low1 = 0; 173 high1 = nrow1; 174 lastcol2 = -1; 175 rp2 = bj + bi[row]; 176 ap2 = ba + bi[row]; 177 rmax2 = bimax[row]; 178 nrow2 = bilen[row]; 179 low2 = 0; 180 high2 = nrow2; 181 182 for (j=0; j<n; j++) { 183 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 184 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 185 if (in[j] >= cstart && in[j] < cend){ 186 col = in[j] - cstart; 187 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 188 } else if (in[j] < 0) continue; 189 #if defined(PETSC_USE_DEBUG) 190 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 191 #endif 192 else { 193 if (mat->was_assembled) { 194 if (!aij->colmap) { 195 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 196 } 197 #if defined (PETSC_USE_CTABLE) 198 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 199 col--; 200 #else 201 col = aij->colmap[in[j]] - 1; 202 #endif 203 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 204 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 205 col = in[j]; 206 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 207 B = aij->B; 208 b = (Mat_SeqAIJ*)B->data; 209 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 210 rp2 = bj + bi[row]; 211 ap2 = ba + bi[row]; 212 rmax2 = bimax[row]; 213 nrow2 = bilen[row]; 214 low2 = 0; 215 high2 = nrow2; 216 bm = aij->B->m; 217 ba = b->a; 218 } 219 } else col = in[j]; 220 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 221 } 222 } 223 } else { 224 if (!aij->donotstash) { 225 if (roworiented) { 226 if (ignorezeroentries && v[i*n] == 0.0) continue; 227 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 228 } else { 229 if (ignorezeroentries && v[i] == 0.0) continue; 230 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 231 } 232 } 233 } 234 } 235 PetscFunctionReturn(0); 236 } 237 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatGetValues_MPIAIJ" 241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 242 { 243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 244 PetscErrorCode ierr; 245 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 246 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 247 248 PetscFunctionBegin; 249 for (i=0; i<m; i++) { 250 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 251 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 252 if (idxm[i] >= rstart && idxm[i] < rend) { 253 row = idxm[i] - rstart; 254 for (j=0; j<n; j++) { 255 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 256 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 257 if (idxn[j] >= cstart && idxn[j] < cend){ 258 col = idxn[j] - cstart; 259 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 260 } else { 261 if (!aij->colmap) { 262 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 263 } 264 #if defined (PETSC_USE_CTABLE) 265 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 266 col --; 267 #else 268 col = aij->colmap[idxn[j]] - 1; 269 #endif 270 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 271 else { 272 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 273 } 274 } 275 } 276 } else { 277 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 278 } 279 } 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 288 PetscErrorCode ierr; 289 PetscInt nstash,reallocs; 290 InsertMode addv; 291 292 PetscFunctionBegin; 293 if (aij->donotstash) { 294 PetscFunctionReturn(0); 295 } 296 297 /* make sure all processors are either in INSERTMODE or ADDMODE */ 298 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 299 if (addv == (ADD_VALUES|INSERT_VALUES)) { 300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 301 } 302 mat->insertmode = addv; /* in case this processor had no cache */ 303 304 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 305 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 306 ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 313 { 314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 315 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 316 PetscErrorCode ierr; 317 PetscMPIInt n; 318 PetscInt i,j,rstart,ncols,flg; 319 PetscInt *row,*col,other_disassembled; 320 PetscScalar *val; 321 InsertMode addv = mat->insertmode; 322 323 PetscFunctionBegin; 324 if (!aij->donotstash) { 325 while (1) { 326 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 327 if (!flg) break; 328 329 for (i=0; i<n;) { 330 /* Now identify the consecutive vals belonging to the same row */ 331 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 332 if (j < n) ncols = j-i; 333 else ncols = n-i; 334 /* Now assemble all these values with a single function call */ 335 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 336 i = j; 337 } 338 } 339 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 340 } 341 a->compressedrow.use = PETSC_FALSE; 342 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 344 345 /* determine if any processor has disassembled, if so we must 346 also disassemble ourselfs, in order that we may reassemble. */ 347 /* 348 if nonzero structure of submatrix B cannot change then we know that 349 no processor disassembled thus we can skip this stuff 350 */ 351 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 352 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 353 if (mat->was_assembled && !other_disassembled) { 354 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 355 } 356 } 357 /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */ 358 b = (Mat_SeqAIJ *)aij->B->data; 359 360 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 361 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 362 } 363 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 364 b->compressedrow.use = PETSC_TRUE; 365 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 366 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 367 368 if (aij->rowvalues) { 369 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 370 aij->rowvalues = 0; 371 } 372 373 /* used by MatAXPY() */ 374 a->xtoy = 0; b->xtoy = 0; 375 a->XtoY = 0; b->XtoY = 0; 376 377 PetscFunctionReturn(0); 378 } 379 380 #undef __FUNCT__ 381 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 382 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 383 { 384 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 385 PetscErrorCode ierr; 386 387 PetscFunctionBegin; 388 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 389 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 390 PetscFunctionReturn(0); 391 } 392 393 #undef __FUNCT__ 394 #define __FUNCT__ "MatZeroRows_MPIAIJ" 395 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag) 396 { 397 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 398 PetscErrorCode ierr; 399 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 400 PetscInt i,*owners = l->rowners; 401 PetscInt *nprocs,j,idx,nsends,row; 402 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 403 PetscInt *rvalues,count,base,slen,*source; 404 PetscInt *lens,*lrows,*values,rstart=l->rstart; 405 MPI_Comm comm = A->comm; 406 MPI_Request *send_waits,*recv_waits; 407 MPI_Status recv_status,*send_status; 408 #if defined(PETSC_DEBUG) 409 PetscTruth found = PETSC_FALSE; 410 #endif 411 412 PetscFunctionBegin; 413 /* first count number of contributors to each processor */ 414 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 415 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 416 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 417 j = 0; 418 for (i=0; i<N; i++) { 419 if (lastidx > (idx = rows[i])) j = 0; 420 lastidx = idx; 421 for (; j<size; j++) { 422 if (idx >= owners[j] && idx < owners[j+1]) { 423 nprocs[2*j]++; 424 nprocs[2*j+1] = 1; 425 owner[i] = j; 426 #if defined(PETSC_DEBUG) 427 found = PETSC_TRUE; 428 #endif 429 break; 430 } 431 } 432 #if defined(PETSC_DEBUG) 433 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 434 found = PETSC_FALSE; 435 #endif 436 } 437 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 438 439 /* inform other processors of number of messages and max length*/ 440 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 441 442 /* post receives: */ 443 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 444 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 445 for (i=0; i<nrecvs; i++) { 446 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 447 } 448 449 /* do sends: 450 1) starts[i] gives the starting index in svalues for stuff going to 451 the ith processor 452 */ 453 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 454 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 455 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 456 starts[0] = 0; 457 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 458 for (i=0; i<N; i++) { 459 svalues[starts[owner[i]]++] = rows[i]; 460 } 461 462 starts[0] = 0; 463 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 464 count = 0; 465 for (i=0; i<size; i++) { 466 if (nprocs[2*i+1]) { 467 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 468 } 469 } 470 ierr = PetscFree(starts);CHKERRQ(ierr); 471 472 base = owners[rank]; 473 474 /* wait on receives */ 475 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 476 source = lens + nrecvs; 477 count = nrecvs; slen = 0; 478 while (count) { 479 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 480 /* unpack receives into our local space */ 481 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 482 source[imdex] = recv_status.MPI_SOURCE; 483 lens[imdex] = n; 484 slen += n; 485 count--; 486 } 487 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 488 489 /* move the data into the send scatter */ 490 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 491 count = 0; 492 for (i=0; i<nrecvs; i++) { 493 values = rvalues + i*nmax; 494 for (j=0; j<lens[i]; j++) { 495 lrows[count++] = values[j] - base; 496 } 497 } 498 ierr = PetscFree(rvalues);CHKERRQ(ierr); 499 ierr = PetscFree(lens);CHKERRQ(ierr); 500 ierr = PetscFree(owner);CHKERRQ(ierr); 501 ierr = PetscFree(nprocs);CHKERRQ(ierr); 502 503 /* actually zap the local rows */ 504 /* 505 Zero the required rows. If the "diagonal block" of the matrix 506 is square and the user wishes to set the diagonal we use seperate 507 code so that MatSetValues() is not called for each diagonal allocating 508 new memory, thus calling lots of mallocs and slowing things down. 509 510 Contributed by: Matthew Knepley 511 */ 512 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 513 ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr); 514 if ((diag != 0.0) && (l->A->M == l->A->N)) { 515 ierr = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr); 516 } else if (diag != 0.0) { 517 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 518 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 519 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 520 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 521 } 522 for (i = 0; i < slen; i++) { 523 row = lrows[i] + rstart; 524 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 525 } 526 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 527 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 528 } else { 529 ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr); 530 } 531 ierr = PetscFree(lrows);CHKERRQ(ierr); 532 533 /* wait on sends */ 534 if (nsends) { 535 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 536 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 537 ierr = PetscFree(send_status);CHKERRQ(ierr); 538 } 539 ierr = PetscFree(send_waits);CHKERRQ(ierr); 540 ierr = PetscFree(svalues);CHKERRQ(ierr); 541 542 PetscFunctionReturn(0); 543 } 544 545 #undef __FUNCT__ 546 #define __FUNCT__ "MatMult_MPIAIJ" 547 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 548 { 549 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 550 PetscErrorCode ierr; 551 PetscInt nt; 552 553 PetscFunctionBegin; 554 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 555 if (nt != A->n) { 556 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 557 } 558 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 559 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 560 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 561 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 562 PetscFunctionReturn(0); 563 } 564 565 #undef __FUNCT__ 566 #define __FUNCT__ "MatMultAdd_MPIAIJ" 567 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 568 { 569 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 570 PetscErrorCode ierr; 571 572 PetscFunctionBegin; 573 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 574 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 575 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 576 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 577 PetscFunctionReturn(0); 578 } 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 582 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 583 { 584 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 585 PetscErrorCode ierr; 586 PetscTruth merged; 587 588 PetscFunctionBegin; 589 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 590 /* do nondiagonal part */ 591 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 592 if (!merged) { 593 /* send it on its way */ 594 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 595 /* do local part */ 596 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 597 /* receive remote parts: note this assumes the values are not actually */ 598 /* added in yy until the next line, */ 599 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 600 } else { 601 /* do local part */ 602 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 603 /* send it on its way */ 604 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 605 /* values actually were received in the Begin() but we need to call this nop */ 606 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 607 } 608 PetscFunctionReturn(0); 609 } 610 611 EXTERN_C_BEGIN 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 614 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f) 615 { 616 MPI_Comm comm; 617 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 618 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 619 IS Me,Notme; 620 PetscErrorCode ierr; 621 PetscInt M,N,first,last,*notme,i; 622 PetscMPIInt size; 623 624 PetscFunctionBegin; 625 626 /* Easy test: symmetric diagonal block */ 627 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 628 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 629 if (!*f) PetscFunctionReturn(0); 630 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 631 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 632 if (size == 1) PetscFunctionReturn(0); 633 634 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 635 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 636 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 637 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 638 for (i=0; i<first; i++) notme[i] = i; 639 for (i=last; i<M; i++) notme[i-last+first] = i; 640 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 641 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 642 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 643 Aoff = Aoffs[0]; 644 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 645 Boff = Boffs[0]; 646 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 647 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 648 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 649 ierr = ISDestroy(Me);CHKERRQ(ierr); 650 ierr = ISDestroy(Notme);CHKERRQ(ierr); 651 652 PetscFunctionReturn(0); 653 } 654 EXTERN_C_END 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 658 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 659 { 660 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 661 PetscErrorCode ierr; 662 663 PetscFunctionBegin; 664 /* do nondiagonal part */ 665 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 666 /* send it on its way */ 667 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 668 /* do local part */ 669 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 670 /* receive remote parts */ 671 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 672 PetscFunctionReturn(0); 673 } 674 675 /* 676 This only works correctly for square matrices where the subblock A->A is the 677 diagonal block 678 */ 679 #undef __FUNCT__ 680 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 681 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 682 { 683 PetscErrorCode ierr; 684 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 685 686 PetscFunctionBegin; 687 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 688 if (a->rstart != a->cstart || a->rend != a->cend) { 689 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 690 } 691 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatScale_MPIAIJ" 697 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 698 { 699 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 700 PetscErrorCode ierr; 701 702 PetscFunctionBegin; 703 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 704 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatDestroy_MPIAIJ" 710 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 711 { 712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 713 PetscErrorCode ierr; 714 715 PetscFunctionBegin; 716 #if defined(PETSC_USE_LOG) 717 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 718 #endif 719 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 720 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 721 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 722 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 723 #if defined (PETSC_USE_CTABLE) 724 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 725 #else 726 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 727 #endif 728 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 729 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 730 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 731 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 732 ierr = PetscFree(aij);CHKERRQ(ierr); 733 734 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 735 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 736 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 737 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 738 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 739 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 740 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatView_MPIAIJ_Binary" 746 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 747 { 748 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 749 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 750 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 751 PetscErrorCode ierr; 752 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 753 int fd; 754 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 755 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 756 PetscScalar *column_values; 757 758 PetscFunctionBegin; 759 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 760 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 761 nz = A->nz + B->nz; 762 if (!rank) { 763 header[0] = MAT_FILE_COOKIE; 764 header[1] = mat->M; 765 header[2] = mat->N; 766 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 767 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 768 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 769 /* get largest number of rows any processor has */ 770 rlen = mat->m; 771 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 772 for (i=1; i<size; i++) { 773 rlen = PetscMax(rlen,range[i+1] - range[i]); 774 } 775 } else { 776 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 777 rlen = mat->m; 778 } 779 780 /* load up the local row counts */ 781 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 782 for (i=0; i<mat->m; i++) { 783 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 784 } 785 786 /* store the row lengths to the file */ 787 if (!rank) { 788 MPI_Status status; 789 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 790 for (i=1; i<size; i++) { 791 rlen = range[i+1] - range[i]; 792 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 793 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 794 } 795 } else { 796 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 797 } 798 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 799 800 /* load up the local column indices */ 801 nzmax = nz; /* )th processor needs space a largest processor needs */ 802 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 803 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 804 cnt = 0; 805 for (i=0; i<mat->m; i++) { 806 for (j=B->i[i]; j<B->i[i+1]; j++) { 807 if ( (col = garray[B->j[j]]) > cstart) break; 808 column_indices[cnt++] = col; 809 } 810 for (k=A->i[i]; k<A->i[i+1]; k++) { 811 column_indices[cnt++] = A->j[k] + cstart; 812 } 813 for (; j<B->i[i+1]; j++) { 814 column_indices[cnt++] = garray[B->j[j]]; 815 } 816 } 817 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 818 819 /* store the column indices to the file */ 820 if (!rank) { 821 MPI_Status status; 822 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 823 for (i=1; i<size; i++) { 824 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 825 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 826 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 827 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 828 } 829 } else { 830 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 831 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 832 } 833 ierr = PetscFree(column_indices);CHKERRQ(ierr); 834 835 /* load up the local column values */ 836 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 837 cnt = 0; 838 for (i=0; i<mat->m; i++) { 839 for (j=B->i[i]; j<B->i[i+1]; j++) { 840 if ( garray[B->j[j]] > cstart) break; 841 column_values[cnt++] = B->a[j]; 842 } 843 for (k=A->i[i]; k<A->i[i+1]; k++) { 844 column_values[cnt++] = A->a[k]; 845 } 846 for (; j<B->i[i+1]; j++) { 847 column_values[cnt++] = B->a[j]; 848 } 849 } 850 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 851 852 /* store the column values to the file */ 853 if (!rank) { 854 MPI_Status status; 855 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 856 for (i=1; i<size; i++) { 857 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 858 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 859 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 860 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 861 } 862 } else { 863 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 864 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 865 } 866 ierr = PetscFree(column_values);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 872 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 873 { 874 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 875 PetscErrorCode ierr; 876 PetscMPIInt rank = aij->rank,size = aij->size; 877 PetscTruth isdraw,iascii,isbinary; 878 PetscViewer sviewer; 879 PetscViewerFormat format; 880 881 PetscFunctionBegin; 882 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 883 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 884 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 885 if (iascii) { 886 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 887 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 888 MatInfo info; 889 PetscTruth inodes; 890 891 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 892 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 893 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 894 if (!inodes) { 895 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 896 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 897 } else { 898 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 899 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 900 } 901 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 902 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 903 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 904 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 905 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 906 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } else if (format == PETSC_VIEWER_ASCII_INFO) { 909 PetscInt inodecount,inodelimit,*inodes; 910 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 911 if (inodes) { 912 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 913 } else { 914 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 915 } 916 PetscFunctionReturn(0); 917 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 918 PetscFunctionReturn(0); 919 } 920 } else if (isbinary) { 921 if (size == 1) { 922 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 923 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 924 } else { 925 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 926 } 927 PetscFunctionReturn(0); 928 } else if (isdraw) { 929 PetscDraw draw; 930 PetscTruth isnull; 931 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 932 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 933 } 934 935 if (size == 1) { 936 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 937 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 938 } else { 939 /* assemble the entire matrix onto first processor. */ 940 Mat A; 941 Mat_SeqAIJ *Aloc; 942 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 943 PetscScalar *a; 944 945 ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr); 946 if (!rank) { 947 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 948 } else { 949 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 950 } 951 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 952 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 953 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 954 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 955 956 /* copy over the A part */ 957 Aloc = (Mat_SeqAIJ*)aij->A->data; 958 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 959 row = aij->rstart; 960 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 961 for (i=0; i<m; i++) { 962 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 963 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 964 } 965 aj = Aloc->j; 966 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 967 968 /* copy over the B part */ 969 Aloc = (Mat_SeqAIJ*)aij->B->data; 970 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 971 row = aij->rstart; 972 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 973 ct = cols; 974 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 975 for (i=0; i<m; i++) { 976 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 977 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 978 } 979 ierr = PetscFree(ct);CHKERRQ(ierr); 980 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 981 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 982 /* 983 Everyone has to call to draw the matrix since the graphics waits are 984 synchronized across all processors that share the PetscDraw object 985 */ 986 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 987 if (!rank) { 988 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 989 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 990 } 991 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 992 ierr = MatDestroy(A);CHKERRQ(ierr); 993 } 994 PetscFunctionReturn(0); 995 } 996 997 #undef __FUNCT__ 998 #define __FUNCT__ "MatView_MPIAIJ" 999 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1000 { 1001 PetscErrorCode ierr; 1002 PetscTruth iascii,isdraw,issocket,isbinary; 1003 1004 PetscFunctionBegin; 1005 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1006 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1007 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1008 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1009 if (iascii || isdraw || isbinary || issocket) { 1010 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1011 } else { 1012 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1013 } 1014 PetscFunctionReturn(0); 1015 } 1016 1017 1018 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "MatRelax_MPIAIJ" 1021 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1022 { 1023 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1024 PetscErrorCode ierr; 1025 Vec bb1; 1026 1027 PetscFunctionBegin; 1028 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1029 1030 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1031 1032 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1033 if (flag & SOR_ZERO_INITIAL_GUESS) { 1034 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1035 its--; 1036 } 1037 1038 while (its--) { 1039 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1040 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1041 1042 /* update rhs: bb1 = bb - B*x */ 1043 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1044 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1045 1046 /* local sweep */ 1047 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1048 CHKERRQ(ierr); 1049 } 1050 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1051 if (flag & SOR_ZERO_INITIAL_GUESS) { 1052 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1053 its--; 1054 } 1055 while (its--) { 1056 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1057 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1058 1059 /* update rhs: bb1 = bb - B*x */ 1060 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1061 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1062 1063 /* local sweep */ 1064 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1065 CHKERRQ(ierr); 1066 } 1067 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1068 if (flag & SOR_ZERO_INITIAL_GUESS) { 1069 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1070 its--; 1071 } 1072 while (its--) { 1073 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1074 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1075 1076 /* update rhs: bb1 = bb - B*x */ 1077 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1078 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1079 1080 /* local sweep */ 1081 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1082 CHKERRQ(ierr); 1083 } 1084 } else { 1085 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1086 } 1087 1088 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatPermute_MPIAIJ" 1094 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1095 { 1096 MPI_Comm comm,pcomm; 1097 PetscInt first,local_size,nrows,*rows; 1098 int ntids; 1099 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1100 PetscErrorCode ierr; 1101 1102 PetscFunctionBegin; 1103 ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr); 1104 /* make a collective version of 'rowp' */ 1105 ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr); 1106 if (pcomm==comm) { 1107 crowp = rowp; 1108 } else { 1109 ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr); 1110 ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr); 1111 ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr); 1112 ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr); 1113 } 1114 /* collect the global row permutation and invert it */ 1115 ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr); 1116 ierr = ISSetPermutation(growp); CHKERRQ(ierr); 1117 if (pcomm!=comm) { 1118 ierr = ISDestroy(crowp); CHKERRQ(ierr); 1119 } 1120 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1121 /* get the local target indices */ 1122 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr); 1123 ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr); 1124 ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr); 1125 ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr); 1126 ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr); 1127 ierr = ISDestroy(irowp); CHKERRQ(ierr); 1128 /* the column permutation is so much easier; 1129 make a local version of 'colp' and invert it */ 1130 ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr); 1131 ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr); 1132 if (ntids==1) { 1133 lcolp = colp; 1134 } else { 1135 ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr); 1136 ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr); 1137 ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr); 1138 } 1139 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr); 1140 ierr = ISSetPermutation(lcolp); CHKERRQ(ierr); 1141 if (ntids>1) { 1142 ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr); 1143 ierr = ISDestroy(lcolp); CHKERRQ(ierr); 1144 } 1145 /* now we just get the submatrix */ 1146 ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr); 1147 /* clean up */ 1148 ierr = ISDestroy(lrowp); CHKERRQ(ierr); 1149 ierr = ISDestroy(icolp); CHKERRQ(ierr); 1150 PetscFunctionReturn(0); 1151 } 1152 1153 #undef __FUNCT__ 1154 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1155 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1156 { 1157 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1158 Mat A = mat->A,B = mat->B; 1159 PetscErrorCode ierr; 1160 PetscReal isend[5],irecv[5]; 1161 1162 PetscFunctionBegin; 1163 info->block_size = 1.0; 1164 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1165 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1166 isend[3] = info->memory; isend[4] = info->mallocs; 1167 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1168 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1169 isend[3] += info->memory; isend[4] += info->mallocs; 1170 if (flag == MAT_LOCAL) { 1171 info->nz_used = isend[0]; 1172 info->nz_allocated = isend[1]; 1173 info->nz_unneeded = isend[2]; 1174 info->memory = isend[3]; 1175 info->mallocs = isend[4]; 1176 } else if (flag == MAT_GLOBAL_MAX) { 1177 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1178 info->nz_used = irecv[0]; 1179 info->nz_allocated = irecv[1]; 1180 info->nz_unneeded = irecv[2]; 1181 info->memory = irecv[3]; 1182 info->mallocs = irecv[4]; 1183 } else if (flag == MAT_GLOBAL_SUM) { 1184 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1185 info->nz_used = irecv[0]; 1186 info->nz_allocated = irecv[1]; 1187 info->nz_unneeded = irecv[2]; 1188 info->memory = irecv[3]; 1189 info->mallocs = irecv[4]; 1190 } 1191 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1192 info->fill_ratio_needed = 0; 1193 info->factor_mallocs = 0; 1194 info->rows_global = (double)matin->M; 1195 info->columns_global = (double)matin->N; 1196 info->rows_local = (double)matin->m; 1197 info->columns_local = (double)matin->N; 1198 1199 PetscFunctionReturn(0); 1200 } 1201 1202 #undef __FUNCT__ 1203 #define __FUNCT__ "MatSetOption_MPIAIJ" 1204 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1205 { 1206 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1207 PetscErrorCode ierr; 1208 1209 PetscFunctionBegin; 1210 switch (op) { 1211 case MAT_NO_NEW_NONZERO_LOCATIONS: 1212 case MAT_YES_NEW_NONZERO_LOCATIONS: 1213 case MAT_COLUMNS_UNSORTED: 1214 case MAT_COLUMNS_SORTED: 1215 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1216 case MAT_KEEP_ZEROED_ROWS: 1217 case MAT_NEW_NONZERO_LOCATION_ERR: 1218 case MAT_USE_INODES: 1219 case MAT_DO_NOT_USE_INODES: 1220 case MAT_IGNORE_ZERO_ENTRIES: 1221 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1222 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1223 break; 1224 case MAT_ROW_ORIENTED: 1225 a->roworiented = PETSC_TRUE; 1226 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1227 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1228 break; 1229 case MAT_ROWS_SORTED: 1230 case MAT_ROWS_UNSORTED: 1231 case MAT_YES_NEW_DIAGONALS: 1232 ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr); 1233 break; 1234 case MAT_COLUMN_ORIENTED: 1235 a->roworiented = PETSC_FALSE; 1236 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1237 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1238 break; 1239 case MAT_IGNORE_OFF_PROC_ENTRIES: 1240 a->donotstash = PETSC_TRUE; 1241 break; 1242 case MAT_NO_NEW_DIAGONALS: 1243 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1244 case MAT_SYMMETRIC: 1245 case MAT_STRUCTURALLY_SYMMETRIC: 1246 case MAT_HERMITIAN: 1247 case MAT_SYMMETRY_ETERNAL: 1248 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1249 break; 1250 case MAT_NOT_SYMMETRIC: 1251 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1252 case MAT_NOT_HERMITIAN: 1253 case MAT_NOT_SYMMETRY_ETERNAL: 1254 break; 1255 default: 1256 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1257 } 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatGetRow_MPIAIJ" 1263 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1264 { 1265 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1266 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1267 PetscErrorCode ierr; 1268 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1269 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1270 PetscInt *cmap,*idx_p; 1271 1272 PetscFunctionBegin; 1273 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1274 mat->getrowactive = PETSC_TRUE; 1275 1276 if (!mat->rowvalues && (idx || v)) { 1277 /* 1278 allocate enough space to hold information from the longest row. 1279 */ 1280 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1281 PetscInt max = 1,tmp; 1282 for (i=0; i<matin->m; i++) { 1283 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1284 if (max < tmp) { max = tmp; } 1285 } 1286 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1287 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1288 } 1289 1290 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1291 lrow = row - rstart; 1292 1293 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1294 if (!v) {pvA = 0; pvB = 0;} 1295 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1296 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1297 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1298 nztot = nzA + nzB; 1299 1300 cmap = mat->garray; 1301 if (v || idx) { 1302 if (nztot) { 1303 /* Sort by increasing column numbers, assuming A and B already sorted */ 1304 PetscInt imark = -1; 1305 if (v) { 1306 *v = v_p = mat->rowvalues; 1307 for (i=0; i<nzB; i++) { 1308 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1309 else break; 1310 } 1311 imark = i; 1312 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1313 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1314 } 1315 if (idx) { 1316 *idx = idx_p = mat->rowindices; 1317 if (imark > -1) { 1318 for (i=0; i<imark; i++) { 1319 idx_p[i] = cmap[cworkB[i]]; 1320 } 1321 } else { 1322 for (i=0; i<nzB; i++) { 1323 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1324 else break; 1325 } 1326 imark = i; 1327 } 1328 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1329 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1330 } 1331 } else { 1332 if (idx) *idx = 0; 1333 if (v) *v = 0; 1334 } 1335 } 1336 *nz = nztot; 1337 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1338 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1344 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1345 { 1346 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1347 1348 PetscFunctionBegin; 1349 if (!aij->getrowactive) { 1350 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1351 } 1352 aij->getrowactive = PETSC_FALSE; 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatNorm_MPIAIJ" 1358 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1359 { 1360 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1361 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1362 PetscErrorCode ierr; 1363 PetscInt i,j,cstart = aij->cstart; 1364 PetscReal sum = 0.0; 1365 PetscScalar *v; 1366 1367 PetscFunctionBegin; 1368 if (aij->size == 1) { 1369 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1370 } else { 1371 if (type == NORM_FROBENIUS) { 1372 v = amat->a; 1373 for (i=0; i<amat->nz; i++) { 1374 #if defined(PETSC_USE_COMPLEX) 1375 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1376 #else 1377 sum += (*v)*(*v); v++; 1378 #endif 1379 } 1380 v = bmat->a; 1381 for (i=0; i<bmat->nz; i++) { 1382 #if defined(PETSC_USE_COMPLEX) 1383 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1384 #else 1385 sum += (*v)*(*v); v++; 1386 #endif 1387 } 1388 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1389 *norm = sqrt(*norm); 1390 } else if (type == NORM_1) { /* max column norm */ 1391 PetscReal *tmp,*tmp2; 1392 PetscInt *jj,*garray = aij->garray; 1393 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1394 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1395 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1396 *norm = 0.0; 1397 v = amat->a; jj = amat->j; 1398 for (j=0; j<amat->nz; j++) { 1399 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1400 } 1401 v = bmat->a; jj = bmat->j; 1402 for (j=0; j<bmat->nz; j++) { 1403 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1404 } 1405 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1406 for (j=0; j<mat->N; j++) { 1407 if (tmp2[j] > *norm) *norm = tmp2[j]; 1408 } 1409 ierr = PetscFree(tmp);CHKERRQ(ierr); 1410 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1411 } else if (type == NORM_INFINITY) { /* max row norm */ 1412 PetscReal ntemp = 0.0; 1413 for (j=0; j<aij->A->m; j++) { 1414 v = amat->a + amat->i[j]; 1415 sum = 0.0; 1416 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1417 sum += PetscAbsScalar(*v); v++; 1418 } 1419 v = bmat->a + bmat->i[j]; 1420 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1421 sum += PetscAbsScalar(*v); v++; 1422 } 1423 if (sum > ntemp) ntemp = sum; 1424 } 1425 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1426 } else { 1427 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1428 } 1429 } 1430 PetscFunctionReturn(0); 1431 } 1432 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatTranspose_MPIAIJ" 1435 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1436 { 1437 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1438 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1439 PetscErrorCode ierr; 1440 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1441 Mat B; 1442 PetscScalar *array; 1443 1444 PetscFunctionBegin; 1445 if (!matout && M != N) { 1446 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1447 } 1448 1449 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 1450 ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr); 1451 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1452 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1453 1454 /* copy over the A part */ 1455 Aloc = (Mat_SeqAIJ*)a->A->data; 1456 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1457 row = a->rstart; 1458 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1459 for (i=0; i<m; i++) { 1460 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1461 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1462 } 1463 aj = Aloc->j; 1464 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1465 1466 /* copy over the B part */ 1467 Aloc = (Mat_SeqAIJ*)a->B->data; 1468 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1469 row = a->rstart; 1470 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1471 ct = cols; 1472 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1473 for (i=0; i<m; i++) { 1474 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1475 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1476 } 1477 ierr = PetscFree(ct);CHKERRQ(ierr); 1478 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1479 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1480 if (matout) { 1481 *matout = B; 1482 } else { 1483 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1484 } 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1490 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1491 { 1492 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1493 Mat a = aij->A,b = aij->B; 1494 PetscErrorCode ierr; 1495 PetscInt s1,s2,s3; 1496 1497 PetscFunctionBegin; 1498 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1499 if (rr) { 1500 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1501 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1502 /* Overlap communication with computation. */ 1503 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1504 } 1505 if (ll) { 1506 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1507 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1508 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1509 } 1510 /* scale the diagonal block */ 1511 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1512 1513 if (rr) { 1514 /* Do a scatter end and then right scale the off-diagonal block */ 1515 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1516 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1517 } 1518 1519 PetscFunctionReturn(0); 1520 } 1521 1522 1523 #undef __FUNCT__ 1524 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1525 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1526 { 1527 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1528 PetscErrorCode ierr; 1529 1530 PetscFunctionBegin; 1531 if (!a->rank) { 1532 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1533 } 1534 PetscFunctionReturn(0); 1535 } 1536 1537 #undef __FUNCT__ 1538 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1539 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1540 { 1541 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1542 PetscErrorCode ierr; 1543 1544 PetscFunctionBegin; 1545 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1546 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1547 PetscFunctionReturn(0); 1548 } 1549 #undef __FUNCT__ 1550 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1551 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1552 { 1553 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1554 PetscErrorCode ierr; 1555 1556 PetscFunctionBegin; 1557 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatEqual_MPIAIJ" 1563 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1564 { 1565 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1566 Mat a,b,c,d; 1567 PetscTruth flg; 1568 PetscErrorCode ierr; 1569 1570 PetscFunctionBegin; 1571 a = matA->A; b = matA->B; 1572 c = matB->A; d = matB->B; 1573 1574 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1575 if (flg) { 1576 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1577 } 1578 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1579 PetscFunctionReturn(0); 1580 } 1581 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "MatCopy_MPIAIJ" 1584 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1585 { 1586 PetscErrorCode ierr; 1587 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1588 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1589 1590 PetscFunctionBegin; 1591 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1592 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1593 /* because of the column compression in the off-processor part of the matrix a->B, 1594 the number of columns in a->B and b->B may be different, hence we cannot call 1595 the MatCopy() directly on the two parts. If need be, we can provide a more 1596 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1597 then copying the submatrices */ 1598 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1599 } else { 1600 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1601 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1602 } 1603 PetscFunctionReturn(0); 1604 } 1605 1606 #undef __FUNCT__ 1607 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1608 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1609 { 1610 PetscErrorCode ierr; 1611 1612 PetscFunctionBegin; 1613 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 #include "petscblaslapack.h" 1618 #undef __FUNCT__ 1619 #define __FUNCT__ "MatAXPY_MPIAIJ" 1620 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1621 { 1622 PetscErrorCode ierr; 1623 PetscInt i; 1624 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1625 PetscBLASInt bnz,one=1; 1626 Mat_SeqAIJ *x,*y; 1627 1628 PetscFunctionBegin; 1629 if (str == SAME_NONZERO_PATTERN) { 1630 PetscScalar alpha = a; 1631 x = (Mat_SeqAIJ *)xx->A->data; 1632 y = (Mat_SeqAIJ *)yy->A->data; 1633 bnz = (PetscBLASInt)x->nz; 1634 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1635 x = (Mat_SeqAIJ *)xx->B->data; 1636 y = (Mat_SeqAIJ *)yy->B->data; 1637 bnz = (PetscBLASInt)x->nz; 1638 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1639 } else if (str == SUBSET_NONZERO_PATTERN) { 1640 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 1641 1642 x = (Mat_SeqAIJ *)xx->B->data; 1643 y = (Mat_SeqAIJ *)yy->B->data; 1644 if (y->xtoy && y->XtoY != xx->B) { 1645 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1646 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1647 } 1648 if (!y->xtoy) { /* get xtoy */ 1649 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1650 y->XtoY = xx->B; 1651 } 1652 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 1653 } else { 1654 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1655 } 1656 PetscFunctionReturn(0); 1657 } 1658 1659 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat); 1660 1661 #undef __FUNCT__ 1662 #define __FUNCT__ "MatConjugate_MPIAIJ" 1663 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat) 1664 { 1665 #if defined(PETSC_USE_COMPLEX) 1666 PetscErrorCode ierr; 1667 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1668 1669 PetscFunctionBegin; 1670 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 1671 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 1672 #else 1673 PetscFunctionBegin; 1674 #endif 1675 PetscFunctionReturn(0); 1676 } 1677 1678 /* -------------------------------------------------------------------*/ 1679 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1680 MatGetRow_MPIAIJ, 1681 MatRestoreRow_MPIAIJ, 1682 MatMult_MPIAIJ, 1683 /* 4*/ MatMultAdd_MPIAIJ, 1684 MatMultTranspose_MPIAIJ, 1685 MatMultTransposeAdd_MPIAIJ, 1686 0, 1687 0, 1688 0, 1689 /*10*/ 0, 1690 0, 1691 0, 1692 MatRelax_MPIAIJ, 1693 MatTranspose_MPIAIJ, 1694 /*15*/ MatGetInfo_MPIAIJ, 1695 MatEqual_MPIAIJ, 1696 MatGetDiagonal_MPIAIJ, 1697 MatDiagonalScale_MPIAIJ, 1698 MatNorm_MPIAIJ, 1699 /*20*/ MatAssemblyBegin_MPIAIJ, 1700 MatAssemblyEnd_MPIAIJ, 1701 0, 1702 MatSetOption_MPIAIJ, 1703 MatZeroEntries_MPIAIJ, 1704 /*25*/ MatZeroRows_MPIAIJ, 1705 0, 1706 0, 1707 0, 1708 0, 1709 /*30*/ MatSetUpPreallocation_MPIAIJ, 1710 0, 1711 0, 1712 0, 1713 0, 1714 /*35*/ MatDuplicate_MPIAIJ, 1715 0, 1716 0, 1717 0, 1718 0, 1719 /*40*/ MatAXPY_MPIAIJ, 1720 MatGetSubMatrices_MPIAIJ, 1721 MatIncreaseOverlap_MPIAIJ, 1722 MatGetValues_MPIAIJ, 1723 MatCopy_MPIAIJ, 1724 /*45*/ MatPrintHelp_MPIAIJ, 1725 MatScale_MPIAIJ, 1726 0, 1727 0, 1728 0, 1729 /*50*/ MatSetBlockSize_MPIAIJ, 1730 0, 1731 0, 1732 0, 1733 0, 1734 /*55*/ MatFDColoringCreate_MPIAIJ, 1735 0, 1736 MatSetUnfactored_MPIAIJ, 1737 MatPermute_MPIAIJ, 1738 0, 1739 /*60*/ MatGetSubMatrix_MPIAIJ, 1740 MatDestroy_MPIAIJ, 1741 MatView_MPIAIJ, 1742 MatGetPetscMaps_Petsc, 1743 0, 1744 /*65*/ 0, 1745 0, 1746 0, 1747 0, 1748 0, 1749 /*70*/ 0, 1750 0, 1751 MatSetColoring_MPIAIJ, 1752 #if defined(PETSC_HAVE_ADIC) 1753 MatSetValuesAdic_MPIAIJ, 1754 #else 1755 0, 1756 #endif 1757 MatSetValuesAdifor_MPIAIJ, 1758 /*75*/ 0, 1759 0, 1760 0, 1761 0, 1762 0, 1763 /*80*/ 0, 1764 0, 1765 0, 1766 0, 1767 /*84*/ MatLoad_MPIAIJ, 1768 0, 1769 0, 1770 0, 1771 0, 1772 0, 1773 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1774 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1775 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1776 MatPtAP_Basic, 1777 MatPtAPSymbolic_MPIAIJ, 1778 /*95*/ MatPtAPNumeric_MPIAIJ, 1779 0, 1780 0, 1781 0, 1782 0, 1783 /*100*/0, 1784 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1785 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1786 MatConjugate_MPIAIJ, 1787 0, 1788 /*105*/MatSetValuesRow_MPIAIJ 1789 }; 1790 1791 /* ----------------------------------------------------------------------------------------*/ 1792 1793 EXTERN_C_BEGIN 1794 #undef __FUNCT__ 1795 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1796 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1797 { 1798 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1799 PetscErrorCode ierr; 1800 1801 PetscFunctionBegin; 1802 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1803 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1804 PetscFunctionReturn(0); 1805 } 1806 EXTERN_C_END 1807 1808 EXTERN_C_BEGIN 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1811 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1812 { 1813 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1814 PetscErrorCode ierr; 1815 1816 PetscFunctionBegin; 1817 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1818 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1819 PetscFunctionReturn(0); 1820 } 1821 EXTERN_C_END 1822 1823 #include "petscpc.h" 1824 EXTERN_C_BEGIN 1825 #undef __FUNCT__ 1826 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1827 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1828 { 1829 Mat_MPIAIJ *b; 1830 PetscErrorCode ierr; 1831 PetscInt i; 1832 1833 PetscFunctionBegin; 1834 B->preallocated = PETSC_TRUE; 1835 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1836 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1837 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1838 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1839 if (d_nnz) { 1840 for (i=0; i<B->m; i++) { 1841 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1842 } 1843 } 1844 if (o_nnz) { 1845 for (i=0; i<B->m; i++) { 1846 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1847 } 1848 } 1849 b = (Mat_MPIAIJ*)B->data; 1850 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1851 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1852 1853 PetscFunctionReturn(0); 1854 } 1855 EXTERN_C_END 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1859 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1860 { 1861 Mat mat; 1862 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1863 PetscErrorCode ierr; 1864 1865 PetscFunctionBegin; 1866 *newmat = 0; 1867 ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr); 1868 ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr); 1869 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1870 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1871 a = (Mat_MPIAIJ*)mat->data; 1872 1873 mat->factor = matin->factor; 1874 mat->bs = matin->bs; 1875 mat->assembled = PETSC_TRUE; 1876 mat->insertmode = NOT_SET_VALUES; 1877 mat->preallocated = PETSC_TRUE; 1878 1879 a->rstart = oldmat->rstart; 1880 a->rend = oldmat->rend; 1881 a->cstart = oldmat->cstart; 1882 a->cend = oldmat->cend; 1883 a->size = oldmat->size; 1884 a->rank = oldmat->rank; 1885 a->donotstash = oldmat->donotstash; 1886 a->roworiented = oldmat->roworiented; 1887 a->rowindices = 0; 1888 a->rowvalues = 0; 1889 a->getrowactive = PETSC_FALSE; 1890 1891 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1892 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1893 if (oldmat->colmap) { 1894 #if defined (PETSC_USE_CTABLE) 1895 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1896 #else 1897 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1898 ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1899 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1900 #endif 1901 } else a->colmap = 0; 1902 if (oldmat->garray) { 1903 PetscInt len; 1904 len = oldmat->B->n; 1905 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1906 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1907 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1908 } else a->garray = 0; 1909 1910 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1911 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1912 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1913 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1914 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1915 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1916 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1917 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1918 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1919 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1920 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1921 *newmat = mat; 1922 PetscFunctionReturn(0); 1923 } 1924 1925 #include "petscsys.h" 1926 1927 #undef __FUNCT__ 1928 #define __FUNCT__ "MatLoad_MPIAIJ" 1929 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat) 1930 { 1931 Mat A; 1932 PetscScalar *vals,*svals; 1933 MPI_Comm comm = ((PetscObject)viewer)->comm; 1934 MPI_Status status; 1935 PetscErrorCode ierr; 1936 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1937 PetscInt i,nz,j,rstart,rend,mmax; 1938 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1939 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1940 PetscInt cend,cstart,n,*rowners; 1941 int fd; 1942 1943 PetscFunctionBegin; 1944 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1945 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1946 if (!rank) { 1947 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1948 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1949 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1950 } 1951 1952 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1953 M = header[1]; N = header[2]; 1954 /* determine ownership of all rows */ 1955 m = M/size + ((M % size) > rank); 1956 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1957 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1958 1959 /* First process needs enough room for process with most rows */ 1960 if (!rank) { 1961 mmax = rowners[1]; 1962 for (i=2; i<size; i++) { 1963 mmax = PetscMax(mmax,rowners[i]); 1964 } 1965 } else mmax = m; 1966 1967 rowners[0] = 0; 1968 for (i=2; i<=size; i++) { 1969 mmax = PetscMax(mmax,rowners[i]); 1970 rowners[i] += rowners[i-1]; 1971 } 1972 rstart = rowners[rank]; 1973 rend = rowners[rank+1]; 1974 1975 /* distribute row lengths to all processors */ 1976 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 1977 if (!rank) { 1978 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1979 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1980 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1981 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1982 for (j=0; j<m; j++) { 1983 procsnz[0] += ourlens[j]; 1984 } 1985 for (i=1; i<size; i++) { 1986 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1987 /* calculate the number of nonzeros on each processor */ 1988 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1989 procsnz[i] += rowlengths[j]; 1990 } 1991 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1992 } 1993 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1994 } else { 1995 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1996 } 1997 1998 if (!rank) { 1999 /* determine max buffer needed and allocate it */ 2000 maxnz = 0; 2001 for (i=0; i<size; i++) { 2002 maxnz = PetscMax(maxnz,procsnz[i]); 2003 } 2004 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2005 2006 /* read in my part of the matrix column indices */ 2007 nz = procsnz[0]; 2008 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2009 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2010 2011 /* read in every one elses and ship off */ 2012 for (i=1; i<size; i++) { 2013 nz = procsnz[i]; 2014 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2015 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2016 } 2017 ierr = PetscFree(cols);CHKERRQ(ierr); 2018 } else { 2019 /* determine buffer space needed for message */ 2020 nz = 0; 2021 for (i=0; i<m; i++) { 2022 nz += ourlens[i]; 2023 } 2024 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2025 2026 /* receive message of column indices*/ 2027 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2028 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2029 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2030 } 2031 2032 /* determine column ownership if matrix is not square */ 2033 if (N != M) { 2034 n = N/size + ((N % size) > rank); 2035 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2036 cstart = cend - n; 2037 } else { 2038 cstart = rstart; 2039 cend = rend; 2040 n = cend - cstart; 2041 } 2042 2043 /* loop over local rows, determining number of off diagonal entries */ 2044 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2045 jj = 0; 2046 for (i=0; i<m; i++) { 2047 for (j=0; j<ourlens[i]; j++) { 2048 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2049 jj++; 2050 } 2051 } 2052 2053 /* create our matrix */ 2054 for (i=0; i<m; i++) { 2055 ourlens[i] -= offlens[i]; 2056 } 2057 ierr = MatCreate(comm,&A);CHKERRQ(ierr); 2058 ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr); 2059 ierr = MatSetType(A,type);CHKERRQ(ierr); 2060 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2061 2062 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2063 for (i=0; i<m; i++) { 2064 ourlens[i] += offlens[i]; 2065 } 2066 2067 if (!rank) { 2068 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2069 2070 /* read in my part of the matrix numerical values */ 2071 nz = procsnz[0]; 2072 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2073 2074 /* insert into matrix */ 2075 jj = rstart; 2076 smycols = mycols; 2077 svals = vals; 2078 for (i=0; i<m; i++) { 2079 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2080 smycols += ourlens[i]; 2081 svals += ourlens[i]; 2082 jj++; 2083 } 2084 2085 /* read in other processors and ship out */ 2086 for (i=1; i<size; i++) { 2087 nz = procsnz[i]; 2088 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2089 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2090 } 2091 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2092 } else { 2093 /* receive numeric values */ 2094 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2095 2096 /* receive message of values*/ 2097 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2098 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2099 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2100 2101 /* insert into matrix */ 2102 jj = rstart; 2103 smycols = mycols; 2104 svals = vals; 2105 for (i=0; i<m; i++) { 2106 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2107 smycols += ourlens[i]; 2108 svals += ourlens[i]; 2109 jj++; 2110 } 2111 } 2112 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2113 ierr = PetscFree(vals);CHKERRQ(ierr); 2114 ierr = PetscFree(mycols);CHKERRQ(ierr); 2115 ierr = PetscFree(rowners);CHKERRQ(ierr); 2116 2117 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2118 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2119 *newmat = A; 2120 PetscFunctionReturn(0); 2121 } 2122 2123 #undef __FUNCT__ 2124 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2125 /* 2126 Not great since it makes two copies of the submatrix, first an SeqAIJ 2127 in local and then by concatenating the local matrices the end result. 2128 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2129 */ 2130 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2131 { 2132 PetscErrorCode ierr; 2133 PetscMPIInt rank,size; 2134 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2135 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2136 Mat *local,M,Mreuse; 2137 PetscScalar *vwork,*aa; 2138 MPI_Comm comm = mat->comm; 2139 Mat_SeqAIJ *aij; 2140 2141 2142 PetscFunctionBegin; 2143 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2144 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2145 2146 if (call == MAT_REUSE_MATRIX) { 2147 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2148 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2149 local = &Mreuse; 2150 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2151 } else { 2152 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2153 Mreuse = *local; 2154 ierr = PetscFree(local);CHKERRQ(ierr); 2155 } 2156 2157 /* 2158 m - number of local rows 2159 n - number of columns (same on all processors) 2160 rstart - first row in new global matrix generated 2161 */ 2162 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2163 if (call == MAT_INITIAL_MATRIX) { 2164 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2165 ii = aij->i; 2166 jj = aij->j; 2167 2168 /* 2169 Determine the number of non-zeros in the diagonal and off-diagonal 2170 portions of the matrix in order to do correct preallocation 2171 */ 2172 2173 /* first get start and end of "diagonal" columns */ 2174 if (csize == PETSC_DECIDE) { 2175 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2176 if (mglobal == n) { /* square matrix */ 2177 nlocal = m; 2178 } else { 2179 nlocal = n/size + ((n % size) > rank); 2180 } 2181 } else { 2182 nlocal = csize; 2183 } 2184 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2185 rstart = rend - nlocal; 2186 if (rank == size - 1 && rend != n) { 2187 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2188 } 2189 2190 /* next, compute all the lengths */ 2191 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2192 olens = dlens + m; 2193 for (i=0; i<m; i++) { 2194 jend = ii[i+1] - ii[i]; 2195 olen = 0; 2196 dlen = 0; 2197 for (j=0; j<jend; j++) { 2198 if (*jj < rstart || *jj >= rend) olen++; 2199 else dlen++; 2200 jj++; 2201 } 2202 olens[i] = olen; 2203 dlens[i] = dlen; 2204 } 2205 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 2206 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 2207 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2208 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2209 ierr = PetscFree(dlens);CHKERRQ(ierr); 2210 } else { 2211 PetscInt ml,nl; 2212 2213 M = *newmat; 2214 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2215 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2216 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2217 /* 2218 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2219 rather than the slower MatSetValues(). 2220 */ 2221 M->was_assembled = PETSC_TRUE; 2222 M->assembled = PETSC_FALSE; 2223 } 2224 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2225 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2226 ii = aij->i; 2227 jj = aij->j; 2228 aa = aij->a; 2229 for (i=0; i<m; i++) { 2230 row = rstart + i; 2231 nz = ii[i+1] - ii[i]; 2232 cwork = jj; jj += nz; 2233 vwork = aa; aa += nz; 2234 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2235 } 2236 2237 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2238 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2239 *newmat = M; 2240 2241 /* save submatrix used in processor for next request */ 2242 if (call == MAT_INITIAL_MATRIX) { 2243 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2244 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2245 } 2246 2247 PetscFunctionReturn(0); 2248 } 2249 2250 EXTERN_C_BEGIN 2251 #undef __FUNCT__ 2252 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2253 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2254 { 2255 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2256 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2257 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2258 const PetscInt *JJ; 2259 PetscScalar *values; 2260 PetscErrorCode ierr; 2261 2262 PetscFunctionBegin; 2263 if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]); 2264 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2265 o_nnz = d_nnz + m; 2266 2267 for (i=0; i<m; i++) { 2268 nnz = I[i+1]- I[i]; 2269 JJ = J + I[i]; 2270 nnz_max = PetscMax(nnz_max,nnz); 2271 if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 2272 for (j=0; j<nnz; j++) { 2273 if (*JJ >= cstart) break; 2274 JJ++; 2275 } 2276 d = 0; 2277 for (; j<nnz; j++) { 2278 if (*JJ++ >= cend) break; 2279 d++; 2280 } 2281 d_nnz[i] = d; 2282 o_nnz[i] = nnz - d; 2283 } 2284 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2285 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2286 2287 if (v) values = (PetscScalar*)v; 2288 else { 2289 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2290 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2291 } 2292 2293 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2294 for (i=0; i<m; i++) { 2295 ii = i + rstart; 2296 nnz = I[i+1]- I[i]; 2297 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 2298 } 2299 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2300 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2301 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2302 2303 if (!v) { 2304 ierr = PetscFree(values);CHKERRQ(ierr); 2305 } 2306 PetscFunctionReturn(0); 2307 } 2308 EXTERN_C_END 2309 2310 #undef __FUNCT__ 2311 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2312 /*@C 2313 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2314 (the default parallel PETSc format). 2315 2316 Collective on MPI_Comm 2317 2318 Input Parameters: 2319 + B - the matrix 2320 . i - the indices into j for the start of each local row (starts with zero) 2321 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2322 - v - optional values in the matrix 2323 2324 Level: developer 2325 2326 .keywords: matrix, aij, compressed row, sparse, parallel 2327 2328 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2329 @*/ 2330 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2331 { 2332 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2333 2334 PetscFunctionBegin; 2335 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2336 if (f) { 2337 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2338 } 2339 PetscFunctionReturn(0); 2340 } 2341 2342 #undef __FUNCT__ 2343 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2344 /*@C 2345 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2346 (the default parallel PETSc format). For good matrix assembly performance 2347 the user should preallocate the matrix storage by setting the parameters 2348 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2349 performance can be increased by more than a factor of 50. 2350 2351 Collective on MPI_Comm 2352 2353 Input Parameters: 2354 + A - the matrix 2355 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2356 (same value is used for all local rows) 2357 . d_nnz - array containing the number of nonzeros in the various rows of the 2358 DIAGONAL portion of the local submatrix (possibly different for each row) 2359 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2360 The size of this array is equal to the number of local rows, i.e 'm'. 2361 You must leave room for the diagonal entry even if it is zero. 2362 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2363 submatrix (same value is used for all local rows). 2364 - o_nnz - array containing the number of nonzeros in the various rows of the 2365 OFF-DIAGONAL portion of the local submatrix (possibly different for 2366 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2367 structure. The size of this array is equal to the number 2368 of local rows, i.e 'm'. 2369 2370 If the *_nnz parameter is given then the *_nz parameter is ignored 2371 2372 The AIJ format (also called the Yale sparse matrix format or 2373 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2374 storage. The stored row and column indices begin with zero. See the users manual for details. 2375 2376 The parallel matrix is partitioned such that the first m0 rows belong to 2377 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2378 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2379 2380 The DIAGONAL portion of the local submatrix of a processor can be defined 2381 as the submatrix which is obtained by extraction the part corresponding 2382 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2383 first row that belongs to the processor, and r2 is the last row belonging 2384 to the this processor. This is a square mxm matrix. The remaining portion 2385 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2386 2387 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2388 2389 Example usage: 2390 2391 Consider the following 8x8 matrix with 34 non-zero values, that is 2392 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2393 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2394 as follows: 2395 2396 .vb 2397 1 2 0 | 0 3 0 | 0 4 2398 Proc0 0 5 6 | 7 0 0 | 8 0 2399 9 0 10 | 11 0 0 | 12 0 2400 ------------------------------------- 2401 13 0 14 | 15 16 17 | 0 0 2402 Proc1 0 18 0 | 19 20 21 | 0 0 2403 0 0 0 | 22 23 0 | 24 0 2404 ------------------------------------- 2405 Proc2 25 26 27 | 0 0 28 | 29 0 2406 30 0 0 | 31 32 33 | 0 34 2407 .ve 2408 2409 This can be represented as a collection of submatrices as: 2410 2411 .vb 2412 A B C 2413 D E F 2414 G H I 2415 .ve 2416 2417 Where the submatrices A,B,C are owned by proc0, D,E,F are 2418 owned by proc1, G,H,I are owned by proc2. 2419 2420 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2421 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2422 The 'M','N' parameters are 8,8, and have the same values on all procs. 2423 2424 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2425 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2426 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2427 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2428 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2429 matrix, ans [DF] as another SeqAIJ matrix. 2430 2431 When d_nz, o_nz parameters are specified, d_nz storage elements are 2432 allocated for every row of the local diagonal submatrix, and o_nz 2433 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2434 One way to choose d_nz and o_nz is to use the max nonzerors per local 2435 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2436 In this case, the values of d_nz,o_nz are: 2437 .vb 2438 proc0 : dnz = 2, o_nz = 2 2439 proc1 : dnz = 3, o_nz = 2 2440 proc2 : dnz = 1, o_nz = 4 2441 .ve 2442 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2443 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2444 for proc3. i.e we are using 12+15+10=37 storage locations to store 2445 34 values. 2446 2447 When d_nnz, o_nnz parameters are specified, the storage is specified 2448 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2449 In the above case the values for d_nnz,o_nnz are: 2450 .vb 2451 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2452 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2453 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2454 .ve 2455 Here the space allocated is sum of all the above values i.e 34, and 2456 hence pre-allocation is perfect. 2457 2458 Level: intermediate 2459 2460 .keywords: matrix, aij, compressed row, sparse, parallel 2461 2462 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2463 MPIAIJ 2464 @*/ 2465 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2466 { 2467 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2468 2469 PetscFunctionBegin; 2470 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2471 if (f) { 2472 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2473 } 2474 PetscFunctionReturn(0); 2475 } 2476 2477 #undef __FUNCT__ 2478 #define __FUNCT__ "MatCreateMPIAIJ" 2479 /*@C 2480 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2481 (the default parallel PETSc format). For good matrix assembly performance 2482 the user should preallocate the matrix storage by setting the parameters 2483 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2484 performance can be increased by more than a factor of 50. 2485 2486 Collective on MPI_Comm 2487 2488 Input Parameters: 2489 + comm - MPI communicator 2490 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2491 This value should be the same as the local size used in creating the 2492 y vector for the matrix-vector product y = Ax. 2493 . n - This value should be the same as the local size used in creating the 2494 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2495 calculated if N is given) For square matrices n is almost always m. 2496 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2497 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2498 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2499 (same value is used for all local rows) 2500 . d_nnz - array containing the number of nonzeros in the various rows of the 2501 DIAGONAL portion of the local submatrix (possibly different for each row) 2502 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2503 The size of this array is equal to the number of local rows, i.e 'm'. 2504 You must leave room for the diagonal entry even if it is zero. 2505 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2506 submatrix (same value is used for all local rows). 2507 - o_nnz - array containing the number of nonzeros in the various rows of the 2508 OFF-DIAGONAL portion of the local submatrix (possibly different for 2509 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2510 structure. The size of this array is equal to the number 2511 of local rows, i.e 'm'. 2512 2513 Output Parameter: 2514 . A - the matrix 2515 2516 Notes: 2517 If the *_nnz parameter is given then the *_nz parameter is ignored 2518 2519 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2520 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2521 storage requirements for this matrix. 2522 2523 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2524 processor than it must be used on all processors that share the object for 2525 that argument. 2526 2527 The user MUST specify either the local or global matrix dimensions 2528 (possibly both). 2529 2530 The parallel matrix is partitioned such that the first m0 rows belong to 2531 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2532 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2533 2534 The DIAGONAL portion of the local submatrix of a processor can be defined 2535 as the submatrix which is obtained by extraction the part corresponding 2536 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2537 first row that belongs to the processor, and r2 is the last row belonging 2538 to the this processor. This is a square mxm matrix. The remaining portion 2539 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2540 2541 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2542 2543 When calling this routine with a single process communicator, a matrix of 2544 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2545 type of communicator, use the construction mechanism: 2546 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2547 2548 By default, this format uses inodes (identical nodes) when possible. 2549 We search for consecutive rows with the same nonzero structure, thereby 2550 reusing matrix information to achieve increased efficiency. 2551 2552 Options Database Keys: 2553 + -mat_no_inode - Do not use inodes 2554 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2555 - -mat_aij_oneindex - Internally use indexing starting at 1 2556 rather than 0. Note that when calling MatSetValues(), 2557 the user still MUST index entries starting at 0! 2558 2559 2560 Example usage: 2561 2562 Consider the following 8x8 matrix with 34 non-zero values, that is 2563 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2564 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2565 as follows: 2566 2567 .vb 2568 1 2 0 | 0 3 0 | 0 4 2569 Proc0 0 5 6 | 7 0 0 | 8 0 2570 9 0 10 | 11 0 0 | 12 0 2571 ------------------------------------- 2572 13 0 14 | 15 16 17 | 0 0 2573 Proc1 0 18 0 | 19 20 21 | 0 0 2574 0 0 0 | 22 23 0 | 24 0 2575 ------------------------------------- 2576 Proc2 25 26 27 | 0 0 28 | 29 0 2577 30 0 0 | 31 32 33 | 0 34 2578 .ve 2579 2580 This can be represented as a collection of submatrices as: 2581 2582 .vb 2583 A B C 2584 D E F 2585 G H I 2586 .ve 2587 2588 Where the submatrices A,B,C are owned by proc0, D,E,F are 2589 owned by proc1, G,H,I are owned by proc2. 2590 2591 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2592 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2593 The 'M','N' parameters are 8,8, and have the same values on all procs. 2594 2595 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2596 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2597 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2598 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2599 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2600 matrix, ans [DF] as another SeqAIJ matrix. 2601 2602 When d_nz, o_nz parameters are specified, d_nz storage elements are 2603 allocated for every row of the local diagonal submatrix, and o_nz 2604 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2605 One way to choose d_nz and o_nz is to use the max nonzerors per local 2606 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2607 In this case, the values of d_nz,o_nz are: 2608 .vb 2609 proc0 : dnz = 2, o_nz = 2 2610 proc1 : dnz = 3, o_nz = 2 2611 proc2 : dnz = 1, o_nz = 4 2612 .ve 2613 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2614 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2615 for proc3. i.e we are using 12+15+10=37 storage locations to store 2616 34 values. 2617 2618 When d_nnz, o_nnz parameters are specified, the storage is specified 2619 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2620 In the above case the values for d_nnz,o_nnz are: 2621 .vb 2622 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2623 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2624 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2625 .ve 2626 Here the space allocated is sum of all the above values i.e 34, and 2627 hence pre-allocation is perfect. 2628 2629 Level: intermediate 2630 2631 .keywords: matrix, aij, compressed row, sparse, parallel 2632 2633 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2634 MPIAIJ 2635 @*/ 2636 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2637 { 2638 PetscErrorCode ierr; 2639 PetscMPIInt size; 2640 2641 PetscFunctionBegin; 2642 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2643 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2644 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2645 if (size > 1) { 2646 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2647 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2648 } else { 2649 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2650 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2651 } 2652 PetscFunctionReturn(0); 2653 } 2654 2655 #undef __FUNCT__ 2656 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2657 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2658 { 2659 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2660 2661 PetscFunctionBegin; 2662 *Ad = a->A; 2663 *Ao = a->B; 2664 *colmap = a->garray; 2665 PetscFunctionReturn(0); 2666 } 2667 2668 #undef __FUNCT__ 2669 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2670 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2671 { 2672 PetscErrorCode ierr; 2673 PetscInt i; 2674 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2675 2676 PetscFunctionBegin; 2677 if (coloring->ctype == IS_COLORING_LOCAL) { 2678 ISColoringValue *allcolors,*colors; 2679 ISColoring ocoloring; 2680 2681 /* set coloring for diagonal portion */ 2682 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2683 2684 /* set coloring for off-diagonal portion */ 2685 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2686 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2687 for (i=0; i<a->B->n; i++) { 2688 colors[i] = allcolors[a->garray[i]]; 2689 } 2690 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2691 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2692 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2693 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2694 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2695 ISColoringValue *colors; 2696 PetscInt *larray; 2697 ISColoring ocoloring; 2698 2699 /* set coloring for diagonal portion */ 2700 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2701 for (i=0; i<a->A->n; i++) { 2702 larray[i] = i + a->cstart; 2703 } 2704 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2705 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2706 for (i=0; i<a->A->n; i++) { 2707 colors[i] = coloring->colors[larray[i]]; 2708 } 2709 ierr = PetscFree(larray);CHKERRQ(ierr); 2710 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2711 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2712 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2713 2714 /* set coloring for off-diagonal portion */ 2715 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2716 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2717 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2718 for (i=0; i<a->B->n; i++) { 2719 colors[i] = coloring->colors[larray[i]]; 2720 } 2721 ierr = PetscFree(larray);CHKERRQ(ierr); 2722 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2723 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2724 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2725 } else { 2726 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2727 } 2728 2729 PetscFunctionReturn(0); 2730 } 2731 2732 #if defined(PETSC_HAVE_ADIC) 2733 #undef __FUNCT__ 2734 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2735 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2736 { 2737 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2738 PetscErrorCode ierr; 2739 2740 PetscFunctionBegin; 2741 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2742 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2743 PetscFunctionReturn(0); 2744 } 2745 #endif 2746 2747 #undef __FUNCT__ 2748 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2749 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2750 { 2751 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2752 PetscErrorCode ierr; 2753 2754 PetscFunctionBegin; 2755 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2756 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2757 PetscFunctionReturn(0); 2758 } 2759 2760 #undef __FUNCT__ 2761 #define __FUNCT__ "MatMerge" 2762 /*@C 2763 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2764 matrices from each processor 2765 2766 Collective on MPI_Comm 2767 2768 Input Parameters: 2769 + comm - the communicators the parallel matrix will live on 2770 . inmat - the input sequential matrices 2771 . n - number of local columns (or PETSC_DECIDE) 2772 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2773 2774 Output Parameter: 2775 . outmat - the parallel matrix generated 2776 2777 Level: advanced 2778 2779 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2780 2781 @*/ 2782 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2783 { 2784 PetscErrorCode ierr; 2785 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2786 PetscInt *indx; 2787 PetscScalar *values; 2788 PetscMap columnmap,rowmap; 2789 2790 PetscFunctionBegin; 2791 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2792 /* 2793 PetscMPIInt rank; 2794 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2795 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2796 */ 2797 if (scall == MAT_INITIAL_MATRIX){ 2798 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2799 if (n == PETSC_DECIDE){ 2800 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2801 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2802 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2803 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2804 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2805 } 2806 2807 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2808 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2809 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2810 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2811 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2812 2813 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2814 for (i=0;i<m;i++) { 2815 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2816 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2817 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2818 } 2819 /* This routine will ONLY return MPIAIJ type matrix */ 2820 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 2821 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2822 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2823 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2824 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2825 2826 } else if (scall == MAT_REUSE_MATRIX){ 2827 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2828 } else { 2829 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2830 } 2831 2832 for (i=0;i<m;i++) { 2833 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2834 I = i + rstart; 2835 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2836 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2837 } 2838 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2839 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2840 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2841 2842 PetscFunctionReturn(0); 2843 } 2844 2845 #undef __FUNCT__ 2846 #define __FUNCT__ "MatFileSplit" 2847 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2848 { 2849 PetscErrorCode ierr; 2850 PetscMPIInt rank; 2851 PetscInt m,N,i,rstart,nnz; 2852 size_t len; 2853 const PetscInt *indx; 2854 PetscViewer out; 2855 char *name; 2856 Mat B; 2857 const PetscScalar *values; 2858 2859 PetscFunctionBegin; 2860 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2861 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2862 /* Should this be the type of the diagonal block of A? */ 2863 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2864 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 2865 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2866 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2867 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2868 for (i=0;i<m;i++) { 2869 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2870 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2871 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2872 } 2873 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2874 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2875 2876 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2877 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2878 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2879 sprintf(name,"%s.%d",outfile,rank); 2880 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2881 ierr = PetscFree(name); 2882 ierr = MatView(B,out);CHKERRQ(ierr); 2883 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2884 ierr = MatDestroy(B);CHKERRQ(ierr); 2885 PetscFunctionReturn(0); 2886 } 2887 2888 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2889 #undef __FUNCT__ 2890 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2891 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2892 { 2893 PetscErrorCode ierr; 2894 Mat_Merge_SeqsToMPI *merge; 2895 PetscObjectContainer container; 2896 2897 PetscFunctionBegin; 2898 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2899 if (container) { 2900 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2901 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2902 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2903 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2904 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2905 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2906 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2907 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2908 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2909 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2910 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2911 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2912 2913 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2914 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2915 } 2916 ierr = PetscFree(merge);CHKERRQ(ierr); 2917 2918 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2919 PetscFunctionReturn(0); 2920 } 2921 2922 #include "src/mat/utils/freespace.h" 2923 #include "petscbt.h" 2924 static PetscEvent logkey_seqstompinum = 0; 2925 #undef __FUNCT__ 2926 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 2927 /*@C 2928 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2929 matrices from each processor 2930 2931 Collective on MPI_Comm 2932 2933 Input Parameters: 2934 + comm - the communicators the parallel matrix will live on 2935 . seqmat - the input sequential matrices 2936 . m - number of local rows (or PETSC_DECIDE) 2937 . n - number of local columns (or PETSC_DECIDE) 2938 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2939 2940 Output Parameter: 2941 . mpimat - the parallel matrix generated 2942 2943 Level: advanced 2944 2945 Notes: 2946 The dimensions of the sequential matrix in each processor MUST be the same. 2947 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2948 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2949 @*/ 2950 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2951 { 2952 PetscErrorCode ierr; 2953 MPI_Comm comm=mpimat->comm; 2954 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2955 PetscMPIInt size,rank,taga,*len_s; 2956 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2957 PetscInt proc,m; 2958 PetscInt **buf_ri,**buf_rj; 2959 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2960 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2961 MPI_Request *s_waits,*r_waits; 2962 MPI_Status *status; 2963 MatScalar *aa=a->a,**abuf_r,*ba_i; 2964 Mat_Merge_SeqsToMPI *merge; 2965 PetscObjectContainer container; 2966 2967 PetscFunctionBegin; 2968 if (!logkey_seqstompinum) { 2969 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2970 } 2971 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2972 2973 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2974 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2975 2976 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2977 if (container) { 2978 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2979 } 2980 bi = merge->bi; 2981 bj = merge->bj; 2982 buf_ri = merge->buf_ri; 2983 buf_rj = merge->buf_rj; 2984 2985 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2986 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2987 len_s = merge->len_s; 2988 2989 /* send and recv matrix values */ 2990 /*-----------------------------*/ 2991 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2992 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2993 2994 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2995 for (proc=0,k=0; proc<size; proc++){ 2996 if (!len_s[proc]) continue; 2997 i = owners[proc]; 2998 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2999 k++; 3000 } 3001 3002 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3003 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3004 ierr = PetscFree(status);CHKERRQ(ierr); 3005 3006 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3007 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3008 3009 /* insert mat values of mpimat */ 3010 /*----------------------------*/ 3011 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3012 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3013 nextrow = buf_ri_k + merge->nrecv; 3014 nextai = nextrow + merge->nrecv; 3015 3016 for (k=0; k<merge->nrecv; k++){ 3017 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3018 nrows = *(buf_ri_k[k]); 3019 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3020 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3021 } 3022 3023 /* set values of ba */ 3024 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 3025 for (i=0; i<m; i++) { 3026 arow = owners[rank] + i; 3027 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3028 bnzi = bi[i+1] - bi[i]; 3029 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3030 3031 /* add local non-zero vals of this proc's seqmat into ba */ 3032 anzi = ai[arow+1] - ai[arow]; 3033 aj = a->j + ai[arow]; 3034 aa = a->a + ai[arow]; 3035 nextaj = 0; 3036 for (j=0; nextaj<anzi; j++){ 3037 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3038 ba_i[j] += aa[nextaj++]; 3039 } 3040 } 3041 3042 /* add received vals into ba */ 3043 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3044 /* i-th row */ 3045 if (i == *nextrow[k]) { 3046 anzi = *(nextai[k]+1) - *nextai[k]; 3047 aj = buf_rj[k] + *(nextai[k]); 3048 aa = abuf_r[k] + *(nextai[k]); 3049 nextaj = 0; 3050 for (j=0; nextaj<anzi; j++){ 3051 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3052 ba_i[j] += aa[nextaj++]; 3053 } 3054 } 3055 nextrow[k]++; nextai[k]++; 3056 } 3057 } 3058 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3059 } 3060 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3061 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3062 3063 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3064 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3065 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3066 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3067 PetscFunctionReturn(0); 3068 } 3069 3070 static PetscEvent logkey_seqstompisym = 0; 3071 #undef __FUNCT__ 3072 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3073 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3074 { 3075 PetscErrorCode ierr; 3076 Mat B_mpi; 3077 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3078 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3079 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3080 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3081 PetscInt len,proc,*dnz,*onz; 3082 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3083 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3084 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3085 MPI_Status *status; 3086 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3087 PetscBT lnkbt; 3088 Mat_Merge_SeqsToMPI *merge; 3089 PetscObjectContainer container; 3090 3091 PetscFunctionBegin; 3092 if (!logkey_seqstompisym) { 3093 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3094 } 3095 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3096 3097 /* make sure it is a PETSc comm */ 3098 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3099 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3100 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3101 3102 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3103 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3104 3105 /* determine row ownership */ 3106 /*---------------------------------------------------------*/ 3107 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3108 if (m == PETSC_DECIDE) { 3109 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3110 } else { 3111 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3112 } 3113 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3114 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3115 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3116 3117 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3118 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3119 3120 /* determine the number of messages to send, their lengths */ 3121 /*---------------------------------------------------------*/ 3122 len_s = merge->len_s; 3123 3124 len = 0; /* length of buf_si[] */ 3125 merge->nsend = 0; 3126 for (proc=0; proc<size; proc++){ 3127 len_si[proc] = 0; 3128 if (proc == rank){ 3129 len_s[proc] = 0; 3130 } else { 3131 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3132 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3133 } 3134 if (len_s[proc]) { 3135 merge->nsend++; 3136 nrows = 0; 3137 for (i=owners[proc]; i<owners[proc+1]; i++){ 3138 if (ai[i+1] > ai[i]) nrows++; 3139 } 3140 len_si[proc] = 2*(nrows+1); 3141 len += len_si[proc]; 3142 } 3143 } 3144 3145 /* determine the number and length of messages to receive for ij-structure */ 3146 /*-------------------------------------------------------------------------*/ 3147 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3148 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3149 3150 /* post the Irecv of j-structure */ 3151 /*-------------------------------*/ 3152 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3153 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3154 3155 /* post the Isend of j-structure */ 3156 /*--------------------------------*/ 3157 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3158 sj_waits = si_waits + merge->nsend; 3159 3160 for (proc=0, k=0; proc<size; proc++){ 3161 if (!len_s[proc]) continue; 3162 i = owners[proc]; 3163 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3164 k++; 3165 } 3166 3167 /* receives and sends of j-structure are complete */ 3168 /*------------------------------------------------*/ 3169 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 3170 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 3171 3172 /* send and recv i-structure */ 3173 /*---------------------------*/ 3174 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3175 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3176 3177 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3178 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3179 for (proc=0,k=0; proc<size; proc++){ 3180 if (!len_s[proc]) continue; 3181 /* form outgoing message for i-structure: 3182 buf_si[0]: nrows to be sent 3183 [1:nrows]: row index (global) 3184 [nrows+1:2*nrows+1]: i-structure index 3185 */ 3186 /*-------------------------------------------*/ 3187 nrows = len_si[proc]/2 - 1; 3188 buf_si_i = buf_si + nrows+1; 3189 buf_si[0] = nrows; 3190 buf_si_i[0] = 0; 3191 nrows = 0; 3192 for (i=owners[proc]; i<owners[proc+1]; i++){ 3193 anzi = ai[i+1] - ai[i]; 3194 if (anzi) { 3195 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3196 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3197 nrows++; 3198 } 3199 } 3200 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3201 k++; 3202 buf_si += len_si[proc]; 3203 } 3204 3205 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 3206 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 3207 3208 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr); 3209 for (i=0; i<merge->nrecv; i++){ 3210 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr); 3211 } 3212 3213 ierr = PetscFree(len_si);CHKERRQ(ierr); 3214 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3215 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3216 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3217 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3218 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3219 ierr = PetscFree(status);CHKERRQ(ierr); 3220 3221 /* compute a local seq matrix in each processor */ 3222 /*----------------------------------------------*/ 3223 /* allocate bi array and free space for accumulating nonzero column info */ 3224 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3225 bi[0] = 0; 3226 3227 /* create and initialize a linked list */ 3228 nlnk = N+1; 3229 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3230 3231 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3232 len = 0; 3233 len = ai[owners[rank+1]] - ai[owners[rank]]; 3234 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3235 current_space = free_space; 3236 3237 /* determine symbolic info for each local row */ 3238 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3239 nextrow = buf_ri_k + merge->nrecv; 3240 nextai = nextrow + merge->nrecv; 3241 for (k=0; k<merge->nrecv; k++){ 3242 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3243 nrows = *buf_ri_k[k]; 3244 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3245 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3246 } 3247 3248 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3249 len = 0; 3250 for (i=0;i<m;i++) { 3251 bnzi = 0; 3252 /* add local non-zero cols of this proc's seqmat into lnk */ 3253 arow = owners[rank] + i; 3254 anzi = ai[arow+1] - ai[arow]; 3255 aj = a->j + ai[arow]; 3256 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3257 bnzi += nlnk; 3258 /* add received col data into lnk */ 3259 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3260 if (i == *nextrow[k]) { /* i-th row */ 3261 anzi = *(nextai[k]+1) - *nextai[k]; 3262 aj = buf_rj[k] + *nextai[k]; 3263 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3264 bnzi += nlnk; 3265 nextrow[k]++; nextai[k]++; 3266 } 3267 } 3268 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3269 3270 /* if free space is not available, make more free space */ 3271 if (current_space->local_remaining<bnzi) { 3272 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3273 nspacedouble++; 3274 } 3275 /* copy data into free space, then initialize lnk */ 3276 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3277 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3278 3279 current_space->array += bnzi; 3280 current_space->local_used += bnzi; 3281 current_space->local_remaining -= bnzi; 3282 3283 bi[i+1] = bi[i] + bnzi; 3284 } 3285 3286 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3287 3288 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3289 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3290 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3291 3292 /* create symbolic parallel matrix B_mpi */ 3293 /*---------------------------------------*/ 3294 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 3295 if (n==PETSC_DECIDE) { 3296 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 3297 } else { 3298 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3299 } 3300 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3301 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3302 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3303 3304 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3305 B_mpi->assembled = PETSC_FALSE; 3306 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3307 merge->bi = bi; 3308 merge->bj = bj; 3309 merge->buf_ri = buf_ri; 3310 merge->buf_rj = buf_rj; 3311 merge->coi = PETSC_NULL; 3312 merge->coj = PETSC_NULL; 3313 merge->owners_co = PETSC_NULL; 3314 3315 /* attach the supporting struct to B_mpi for reuse */ 3316 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3317 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3318 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3319 *mpimat = B_mpi; 3320 3321 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3322 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3323 PetscFunctionReturn(0); 3324 } 3325 3326 static PetscEvent logkey_seqstompi = 0; 3327 #undef __FUNCT__ 3328 #define __FUNCT__ "MatMerge_SeqsToMPI" 3329 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3330 { 3331 PetscErrorCode ierr; 3332 3333 PetscFunctionBegin; 3334 if (!logkey_seqstompi) { 3335 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3336 } 3337 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3338 if (scall == MAT_INITIAL_MATRIX){ 3339 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3340 } 3341 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3342 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3343 PetscFunctionReturn(0); 3344 } 3345 static PetscEvent logkey_getlocalmat = 0; 3346 #undef __FUNCT__ 3347 #define __FUNCT__ "MatGetLocalMat" 3348 /*@C 3349 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3350 3351 Not Collective 3352 3353 Input Parameters: 3354 + A - the matrix 3355 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3356 3357 Output Parameter: 3358 . A_loc - the local sequential matrix generated 3359 3360 Level: developer 3361 3362 @*/ 3363 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3364 { 3365 PetscErrorCode ierr; 3366 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3367 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3368 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3369 PetscScalar *aa=a->a,*ba=b->a,*ca; 3370 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3371 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3372 3373 PetscFunctionBegin; 3374 if (!logkey_getlocalmat) { 3375 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3376 } 3377 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3378 if (scall == MAT_INITIAL_MATRIX){ 3379 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3380 ci[0] = 0; 3381 for (i=0; i<am; i++){ 3382 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3383 } 3384 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3385 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3386 k = 0; 3387 for (i=0; i<am; i++) { 3388 ncols_o = bi[i+1] - bi[i]; 3389 ncols_d = ai[i+1] - ai[i]; 3390 /* off-diagonal portion of A */ 3391 for (jo=0; jo<ncols_o; jo++) { 3392 col = cmap[*bj]; 3393 if (col >= cstart) break; 3394 cj[k] = col; bj++; 3395 ca[k++] = *ba++; 3396 } 3397 /* diagonal portion of A */ 3398 for (j=0; j<ncols_d; j++) { 3399 cj[k] = cstart + *aj++; 3400 ca[k++] = *aa++; 3401 } 3402 /* off-diagonal portion of A */ 3403 for (j=jo; j<ncols_o; j++) { 3404 cj[k] = cmap[*bj++]; 3405 ca[k++] = *ba++; 3406 } 3407 } 3408 /* put together the new matrix */ 3409 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3410 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3411 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3412 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3413 mat->freedata = PETSC_TRUE; 3414 mat->nonew = 0; 3415 } else if (scall == MAT_REUSE_MATRIX){ 3416 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3417 ci = mat->i; cj = mat->j; ca = mat->a; 3418 for (i=0; i<am; i++) { 3419 /* off-diagonal portion of A */ 3420 ncols_o = bi[i+1] - bi[i]; 3421 for (jo=0; jo<ncols_o; jo++) { 3422 col = cmap[*bj]; 3423 if (col >= cstart) break; 3424 *ca++ = *ba++; bj++; 3425 } 3426 /* diagonal portion of A */ 3427 ncols_d = ai[i+1] - ai[i]; 3428 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3429 /* off-diagonal portion of A */ 3430 for (j=jo; j<ncols_o; j++) { 3431 *ca++ = *ba++; bj++; 3432 } 3433 } 3434 } else { 3435 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3436 } 3437 3438 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3439 PetscFunctionReturn(0); 3440 } 3441 3442 static PetscEvent logkey_getlocalmatcondensed = 0; 3443 #undef __FUNCT__ 3444 #define __FUNCT__ "MatGetLocalMatCondensed" 3445 /*@C 3446 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3447 3448 Not Collective 3449 3450 Input Parameters: 3451 + A - the matrix 3452 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3453 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3454 3455 Output Parameter: 3456 . A_loc - the local sequential matrix generated 3457 3458 Level: developer 3459 3460 @*/ 3461 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3462 { 3463 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3464 PetscErrorCode ierr; 3465 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3466 IS isrowa,iscola; 3467 Mat *aloc; 3468 3469 PetscFunctionBegin; 3470 if (!logkey_getlocalmatcondensed) { 3471 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3472 } 3473 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3474 if (!row){ 3475 start = a->rstart; end = a->rend; 3476 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3477 } else { 3478 isrowa = *row; 3479 } 3480 if (!col){ 3481 start = a->cstart; 3482 cmap = a->garray; 3483 nzA = a->A->n; 3484 nzB = a->B->n; 3485 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3486 ncols = 0; 3487 for (i=0; i<nzB; i++) { 3488 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3489 else break; 3490 } 3491 imark = i; 3492 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3493 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3494 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3495 ierr = PetscFree(idx);CHKERRQ(ierr); 3496 } else { 3497 iscola = *col; 3498 } 3499 if (scall != MAT_INITIAL_MATRIX){ 3500 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3501 aloc[0] = *A_loc; 3502 } 3503 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3504 *A_loc = aloc[0]; 3505 ierr = PetscFree(aloc);CHKERRQ(ierr); 3506 if (!row){ 3507 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3508 } 3509 if (!col){ 3510 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3511 } 3512 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3513 PetscFunctionReturn(0); 3514 } 3515 3516 static PetscEvent logkey_GetBrowsOfAcols = 0; 3517 #undef __FUNCT__ 3518 #define __FUNCT__ "MatGetBrowsOfAcols" 3519 /*@C 3520 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3521 3522 Collective on Mat 3523 3524 Input Parameters: 3525 + A,B - the matrices in mpiaij format 3526 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3527 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3528 3529 Output Parameter: 3530 + rowb, colb - index sets of rows and columns of B to extract 3531 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3532 - B_seq - the sequential matrix generated 3533 3534 Level: developer 3535 3536 @*/ 3537 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3538 { 3539 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3540 PetscErrorCode ierr; 3541 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3542 IS isrowb,iscolb; 3543 Mat *bseq; 3544 3545 PetscFunctionBegin; 3546 if (a->cstart != b->rstart || a->cend != b->rend){ 3547 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3548 } 3549 if (!logkey_GetBrowsOfAcols) { 3550 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3551 } 3552 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3553 3554 if (scall == MAT_INITIAL_MATRIX){ 3555 start = a->cstart; 3556 cmap = a->garray; 3557 nzA = a->A->n; 3558 nzB = a->B->n; 3559 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3560 ncols = 0; 3561 for (i=0; i<nzB; i++) { /* row < local row index */ 3562 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3563 else break; 3564 } 3565 imark = i; 3566 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3567 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3568 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3569 ierr = PetscFree(idx);CHKERRQ(ierr); 3570 *brstart = imark; 3571 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3572 } else { 3573 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3574 isrowb = *rowb; iscolb = *colb; 3575 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3576 bseq[0] = *B_seq; 3577 } 3578 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3579 *B_seq = bseq[0]; 3580 ierr = PetscFree(bseq);CHKERRQ(ierr); 3581 if (!rowb){ 3582 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3583 } else { 3584 *rowb = isrowb; 3585 } 3586 if (!colb){ 3587 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3588 } else { 3589 *colb = iscolb; 3590 } 3591 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3592 PetscFunctionReturn(0); 3593 } 3594 3595 static PetscEvent logkey_GetBrowsOfAocols = 0; 3596 #undef __FUNCT__ 3597 #define __FUNCT__ "MatGetBrowsOfAoCols" 3598 /*@C 3599 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3600 of the OFF-DIAGONAL portion of local A 3601 3602 Collective on Mat 3603 3604 Input Parameters: 3605 + A,B - the matrices in mpiaij format 3606 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3607 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3608 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3609 3610 Output Parameter: 3611 + B_oth - the sequential matrix generated 3612 3613 Level: developer 3614 3615 @*/ 3616 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3617 { 3618 VecScatter_MPI_General *gen_to,*gen_from; 3619 PetscErrorCode ierr; 3620 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3621 Mat_SeqAIJ *b_oth; 3622 VecScatter ctx=a->Mvctx; 3623 MPI_Comm comm=ctx->comm; 3624 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3625 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3626 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3627 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3628 MPI_Request *rwaits,*swaits; 3629 MPI_Status *sstatus,rstatus; 3630 PetscInt *cols; 3631 PetscScalar *vals; 3632 PetscMPIInt j; 3633 3634 PetscFunctionBegin; 3635 if (a->cstart != b->rstart || a->cend != b->rend){ 3636 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3637 } 3638 if (!logkey_GetBrowsOfAocols) { 3639 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3640 } 3641 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3642 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3643 3644 gen_to = (VecScatter_MPI_General*)ctx->todata; 3645 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3646 rvalues = gen_from->values; /* holds the length of sending row */ 3647 svalues = gen_to->values; /* holds the length of receiving row */ 3648 nrecvs = gen_from->n; 3649 nsends = gen_to->n; 3650 rwaits = gen_from->requests; 3651 swaits = gen_to->requests; 3652 srow = gen_to->indices; /* local row index to be sent */ 3653 rstarts = gen_from->starts; 3654 sstarts = gen_to->starts; 3655 rprocs = gen_from->procs; 3656 sprocs = gen_to->procs; 3657 sstatus = gen_to->sstatus; 3658 3659 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3660 if (scall == MAT_INITIAL_MATRIX){ 3661 /* i-array */ 3662 /*---------*/ 3663 /* post receives */ 3664 for (i=0; i<nrecvs; i++){ 3665 rowlen = (PetscInt*)rvalues + rstarts[i]; 3666 nrows = rstarts[i+1]-rstarts[i]; 3667 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3668 } 3669 3670 /* pack the outgoing message */ 3671 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3672 rstartsj = sstartsj + nsends +1; 3673 sstartsj[0] = 0; rstartsj[0] = 0; 3674 len = 0; /* total length of j or a array to be sent */ 3675 k = 0; 3676 for (i=0; i<nsends; i++){ 3677 rowlen = (PetscInt*)svalues + sstarts[i]; 3678 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3679 for (j=0; j<nrows; j++) { 3680 row = srow[k] + b->rowners[rank]; /* global row idx */ 3681 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3682 len += rowlen[j]; 3683 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3684 k++; 3685 } 3686 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3687 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3688 } 3689 /* recvs and sends of i-array are completed */ 3690 i = nrecvs; 3691 while (i--) { 3692 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3693 } 3694 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3695 /* allocate buffers for sending j and a arrays */ 3696 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3697 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3698 3699 /* create i-array of B_oth */ 3700 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3701 b_othi[0] = 0; 3702 len = 0; /* total length of j or a array to be received */ 3703 k = 0; 3704 for (i=0; i<nrecvs; i++){ 3705 rowlen = (PetscInt*)rvalues + rstarts[i]; 3706 nrows = rstarts[i+1]-rstarts[i]; 3707 for (j=0; j<nrows; j++) { 3708 b_othi[k+1] = b_othi[k] + rowlen[j]; 3709 len += rowlen[j]; k++; 3710 } 3711 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3712 } 3713 3714 /* allocate space for j and a arrrays of B_oth */ 3715 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3716 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3717 3718 /* j-array */ 3719 /*---------*/ 3720 /* post receives of j-array */ 3721 for (i=0; i<nrecvs; i++){ 3722 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3723 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3724 } 3725 k = 0; 3726 for (i=0; i<nsends; i++){ 3727 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3728 bufJ = bufj+sstartsj[i]; 3729 for (j=0; j<nrows; j++) { 3730 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3731 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3732 for (l=0; l<ncols; l++){ 3733 *bufJ++ = cols[l]; 3734 } 3735 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3736 } 3737 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3738 } 3739 3740 /* recvs and sends of j-array are completed */ 3741 i = nrecvs; 3742 while (i--) { 3743 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3744 } 3745 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3746 } else if (scall == MAT_REUSE_MATRIX){ 3747 sstartsj = *startsj; 3748 rstartsj = sstartsj + nsends +1; 3749 bufa = *bufa_ptr; 3750 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3751 b_otha = b_oth->a; 3752 } else { 3753 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3754 } 3755 3756 /* a-array */ 3757 /*---------*/ 3758 /* post receives of a-array */ 3759 for (i=0; i<nrecvs; i++){ 3760 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3761 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3762 } 3763 k = 0; 3764 for (i=0; i<nsends; i++){ 3765 nrows = sstarts[i+1]-sstarts[i]; 3766 bufA = bufa+sstartsj[i]; 3767 for (j=0; j<nrows; j++) { 3768 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3769 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3770 for (l=0; l<ncols; l++){ 3771 *bufA++ = vals[l]; 3772 } 3773 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3774 3775 } 3776 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3777 } 3778 /* recvs and sends of a-array are completed */ 3779 i = nrecvs; 3780 while (i--) { 3781 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3782 } 3783 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 3784 3785 if (scall == MAT_INITIAL_MATRIX){ 3786 /* put together the new matrix */ 3787 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3788 3789 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3790 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3791 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3792 b_oth->freedata = PETSC_TRUE; 3793 b_oth->nonew = 0; 3794 3795 ierr = PetscFree(bufj);CHKERRQ(ierr); 3796 if (!startsj || !bufa_ptr){ 3797 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3798 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3799 } else { 3800 *startsj = sstartsj; 3801 *bufa_ptr = bufa; 3802 } 3803 } 3804 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3805 3806 PetscFunctionReturn(0); 3807 } 3808 3809 #undef __FUNCT__ 3810 #define __FUNCT__ "MatGetCommunicationStructs" 3811 /*@C 3812 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 3813 3814 Not Collective 3815 3816 Input Parameters: 3817 . A - The matrix in mpiaij format 3818 3819 Output Parameter: 3820 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 3821 . colmap - A map from global column index to local index into lvec 3822 - multScatter - A scatter from the argument of a matrix-vector product to lvec 3823 3824 Level: developer 3825 3826 @*/ 3827 #if defined (PETSC_USE_CTABLE) 3828 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 3829 #else 3830 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 3831 #endif 3832 { 3833 Mat_MPIAIJ *a; 3834 3835 PetscFunctionBegin; 3836 PetscValidHeaderSpecific(A, MAT_COOKIE, 1); 3837 PetscValidPointer(lvec, 2) 3838 PetscValidPointer(colmap, 3) 3839 PetscValidPointer(multScatter, 4) 3840 a = (Mat_MPIAIJ *) A->data; 3841 if (lvec) *lvec = a->lvec; 3842 if (colmap) *colmap = a->colmap; 3843 if (multScatter) *multScatter = a->Mvctx; 3844 PetscFunctionReturn(0); 3845 } 3846 3847 /*MC 3848 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3849 3850 Options Database Keys: 3851 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3852 3853 Level: beginner 3854 3855 .seealso: MatCreateMPIAIJ 3856 M*/ 3857 3858 EXTERN_C_BEGIN 3859 #undef __FUNCT__ 3860 #define __FUNCT__ "MatCreate_MPIAIJ" 3861 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3862 { 3863 Mat_MPIAIJ *b; 3864 PetscErrorCode ierr; 3865 PetscInt i; 3866 PetscMPIInt size; 3867 3868 PetscFunctionBegin; 3869 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3870 3871 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3872 B->data = (void*)b; 3873 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3874 B->factor = 0; 3875 B->bs = 1; 3876 B->assembled = PETSC_FALSE; 3877 B->mapping = 0; 3878 3879 B->insertmode = NOT_SET_VALUES; 3880 b->size = size; 3881 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3882 3883 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3884 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3885 3886 /* the information in the maps duplicates the information computed below, eventually 3887 we should remove the duplicate information that is not contained in the maps */ 3888 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3889 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3890 3891 /* build local table of row and column ownerships */ 3892 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3893 ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3894 b->cowners = b->rowners + b->size + 2; 3895 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3896 b->rowners[0] = 0; 3897 for (i=2; i<=b->size; i++) { 3898 b->rowners[i] += b->rowners[i-1]; 3899 } 3900 b->rstart = b->rowners[b->rank]; 3901 b->rend = b->rowners[b->rank+1]; 3902 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3903 b->cowners[0] = 0; 3904 for (i=2; i<=b->size; i++) { 3905 b->cowners[i] += b->cowners[i-1]; 3906 } 3907 b->cstart = b->cowners[b->rank]; 3908 b->cend = b->cowners[b->rank+1]; 3909 3910 /* build cache for off array entries formed */ 3911 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3912 b->donotstash = PETSC_FALSE; 3913 b->colmap = 0; 3914 b->garray = 0; 3915 b->roworiented = PETSC_TRUE; 3916 3917 /* stuff used for matrix vector multiply */ 3918 b->lvec = PETSC_NULL; 3919 b->Mvctx = PETSC_NULL; 3920 3921 /* stuff for MatGetRow() */ 3922 b->rowindices = 0; 3923 b->rowvalues = 0; 3924 b->getrowactive = PETSC_FALSE; 3925 3926 /* Explicitly create 2 MATSEQAIJ matrices. */ 3927 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3928 ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr); 3929 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3930 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3931 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3932 ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr); 3933 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3934 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3935 3936 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3937 "MatStoreValues_MPIAIJ", 3938 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3939 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3940 "MatRetrieveValues_MPIAIJ", 3941 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3942 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3943 "MatGetDiagonalBlock_MPIAIJ", 3944 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3945 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3946 "MatIsTranspose_MPIAIJ", 3947 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3948 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3949 "MatMPIAIJSetPreallocation_MPIAIJ", 3950 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3951 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3952 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3953 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3954 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3955 "MatDiagonalScaleLocal_MPIAIJ", 3956 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3957 PetscFunctionReturn(0); 3958 } 3959 EXTERN_C_END 3960