1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 #define CHUNKSIZE 15 37 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 38 { \ 39 if (lastcol1 > col) low1 = 0; else high1 = nrow1; \ 40 lastcol1 = col;\ 41 while (high1-low1 > 5) { \ 42 t = (low1+high1)/2; \ 43 if (rp1[t] > col) high1 = t; \ 44 else low1 = t; \ 45 } \ 46 for (_i=low1; _i<high1; _i++) { \ 47 if (rp1[_i] > col) break; \ 48 if (rp1[_i] == col) { \ 49 if (addv == ADD_VALUES) ap1[_i] += value; \ 50 else ap1[_i] = value; \ 51 goto a_noinsert; \ 52 } \ 53 } \ 54 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 55 if (nonew == 1) goto a_noinsert; \ 56 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 57 if (nrow1 >= rmax1) { \ 58 /* there is no extra room in row, therefore enlarge */ \ 59 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 60 PetscScalar *new_a; \ 61 \ 62 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 63 \ 64 /* malloc new storage space */ \ 65 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 66 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 67 new_j = (PetscInt*)(new_a + new_nz); \ 68 new_i = new_j + new_nz; \ 69 \ 70 /* copy over old data into new slots */ \ 71 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 72 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 73 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow1)*sizeof(PetscInt));CHKERRQ(ierr); \ 74 len = (new_nz - CHUNKSIZE - ai[row] - nrow1); \ 75 ierr = PetscMemcpy(new_j+ai[row]+nrow1+CHUNKSIZE,aj+ai[row]+nrow1,len*sizeof(PetscInt));CHKERRQ(ierr); \ 76 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow1)*sizeof(PetscScalar));CHKERRQ(ierr); \ 77 ierr = PetscMemcpy(new_a+ai[row]+nrow1+CHUNKSIZE,aa+ai[row]+nrow1,len*sizeof(PetscScalar));CHKERRQ(ierr); \ 78 /* free up old matrix storage */ \ 79 \ 80 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 81 if (!a->singlemalloc) { \ 82 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 83 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 84 } \ 85 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 86 a->singlemalloc = PETSC_TRUE; \ 87 \ 88 rp1 = aj + ai[row]; ap1 = aa + ai[row]; \ 89 rmax1 = aimax[row] = aimax[row] + CHUNKSIZE; \ 90 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \ 91 a->maxnz += CHUNKSIZE; \ 92 a->reallocs++; \ 93 } \ 94 N = nrow1++ - 1; a->nz++; \ 95 /* shift up all the later entries in this row */ \ 96 for (ii=N; ii>=_i; ii--) { \ 97 rp1[ii+1] = rp1[ii]; \ 98 ap1[ii+1] = ap1[ii]; \ 99 } \ 100 rp1[_i] = col; \ 101 ap1[_i] = value; \ 102 a_noinsert: ; \ 103 ailen[row] = nrow1; \ 104 } 105 106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 107 { \ 108 if (lastcol2 > col) low2 = 0; else high2 = nrow2; \ 109 lastcol2 = col;\ 110 while (high2-low2 > 5) { \ 111 t = (low2+high2)/2; \ 112 if (rp2[t] > col) high2 = t; \ 113 else low2 = t; \ 114 } \ 115 for (_i=low2; _i<high2; _i++) { \ 116 if (rp2[_i] > col) break; \ 117 if (rp2[_i] == col) { \ 118 if (addv == ADD_VALUES) ap2[_i] += value; \ 119 else ap2[_i] = value; \ 120 goto b_noinsert; \ 121 } \ 122 } \ 123 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 124 if (nonew == 1) goto b_noinsert; \ 125 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 126 if (nrow2 >= rmax2) { \ 127 /* there is no extra room in row, therefore enlarge */ \ 128 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 129 PetscScalar *new_a; \ 130 \ 131 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 132 \ 133 /* malloc new storage space */ \ 134 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 135 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 136 new_j = (PetscInt*)(new_a + new_nz); \ 137 new_i = new_j + new_nz; \ 138 \ 139 /* copy over old data into new slots */ \ 140 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 141 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 142 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow2)*sizeof(PetscInt));CHKERRQ(ierr); \ 143 len = (new_nz - CHUNKSIZE - bi[row] - nrow2); \ 144 ierr = PetscMemcpy(new_j+bi[row]+nrow2+CHUNKSIZE,bj+bi[row]+nrow2,len*sizeof(PetscInt));CHKERRQ(ierr); \ 145 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow2)*sizeof(PetscScalar));CHKERRQ(ierr); \ 146 ierr = PetscMemcpy(new_a+bi[row]+nrow2+CHUNKSIZE,ba+bi[row]+nrow2,len*sizeof(PetscScalar));CHKERRQ(ierr); \ 147 /* free up old matrix storage */ \ 148 \ 149 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 150 if (!b->singlemalloc) { \ 151 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 152 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 153 } \ 154 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 155 b->singlemalloc = PETSC_TRUE; \ 156 \ 157 rp2 = bj + bi[row]; ap2 = ba + bi[row]; \ 158 rmax2 = bimax[row] = bimax[row] + CHUNKSIZE; \ 159 ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \ 160 b->maxnz += CHUNKSIZE; \ 161 b->reallocs++; \ 162 } \ 163 N = nrow2++ - 1; b->nz++; \ 164 /* shift up all the later entries in this row */ \ 165 for (ii=N; ii>=_i; ii--) { \ 166 rp2[ii+1] = rp2[ii]; \ 167 ap2[ii+1] = ap2[ii]; \ 168 } \ 169 rp2[_i] = col; \ 170 ap2[_i] = value; \ 171 b_noinsert: ; \ 172 bilen[row] = nrow2; \ 173 } 174 175 #undef __FUNCT__ 176 #define __FUNCT__ "MatSetValues_MPIAIJ" 177 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 178 { 179 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 180 PetscScalar value; 181 PetscErrorCode ierr; 182 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 183 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 184 PetscTruth roworiented = aij->roworiented; 185 186 /* Some Variables required in the macro */ 187 Mat A = aij->A; 188 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 189 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 190 PetscScalar *aa = a->a; 191 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 192 Mat B = aij->B; 193 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 194 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 195 PetscScalar *ba = b->a; 196 197 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 198 PetscInt nonew = a->nonew; 199 PetscScalar *ap1,*ap2; 200 201 PetscFunctionBegin; 202 for (i=0; i<m; i++) { 203 if (im[i] < 0) continue; 204 #if defined(PETSC_USE_DEBUG) 205 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 206 #endif 207 if (im[i] >= rstart && im[i] < rend) { 208 row = im[i] - rstart; 209 lastcol1 = -1; 210 rp1 = aj + ai[row]; 211 ap1 = aa + ai[row]; 212 rmax1 = aimax[row]; 213 nrow1 = ailen[row]; 214 low1 = 0; 215 high1 = nrow1; 216 lastcol2 = -1; 217 rp2 = bj + bi[row]; 218 ap2 = ba + bi[row]; 219 rmax2 = bimax[row]; 220 nrow2 = bilen[row]; 221 low2 = 0; 222 high2 = nrow2; 223 224 for (j=0; j<n; j++) { 225 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 226 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 227 if (in[j] >= cstart && in[j] < cend){ 228 col = in[j] - cstart; 229 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 230 } else if (in[j] < 0) continue; 231 #if defined(PETSC_USE_DEBUG) 232 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!aij->colmap) { 237 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 #if defined (PETSC_USE_CTABLE) 240 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 241 col--; 242 #else 243 col = aij->colmap[in[j]] - 1; 244 #endif 245 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 246 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 247 col = in[j]; 248 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 249 B = aij->B; 250 b = (Mat_SeqAIJ*)B->data; 251 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 252 rp2 = bj + bi[row]; 253 ap2 = ba + bi[row]; 254 rmax2 = bimax[row]; 255 nrow2 = bilen[row]; 256 low2 = 0; 257 high2 = nrow2; 258 ba = b->a; 259 } 260 } else col = in[j]; 261 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 262 } 263 } 264 } else { 265 if (!aij->donotstash) { 266 if (roworiented) { 267 if (ignorezeroentries && v[i*n] == 0.0) continue; 268 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 269 } else { 270 if (ignorezeroentries && v[i] == 0.0) continue; 271 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 272 } 273 } 274 } 275 } 276 PetscFunctionReturn(0); 277 } 278 279 #undef __FUNCT__ 280 #define __FUNCT__ "MatGetValues_MPIAIJ" 281 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 282 { 283 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 284 PetscErrorCode ierr; 285 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 286 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 287 288 PetscFunctionBegin; 289 for (i=0; i<m; i++) { 290 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 291 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 292 if (idxm[i] >= rstart && idxm[i] < rend) { 293 row = idxm[i] - rstart; 294 for (j=0; j<n; j++) { 295 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 296 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 297 if (idxn[j] >= cstart && idxn[j] < cend){ 298 col = idxn[j] - cstart; 299 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 300 } else { 301 if (!aij->colmap) { 302 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 303 } 304 #if defined (PETSC_USE_CTABLE) 305 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 306 col --; 307 #else 308 col = aij->colmap[idxn[j]] - 1; 309 #endif 310 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 311 else { 312 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 313 } 314 } 315 } 316 } else { 317 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 318 } 319 } 320 PetscFunctionReturn(0); 321 } 322 323 #undef __FUNCT__ 324 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 325 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 326 { 327 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 328 PetscErrorCode ierr; 329 PetscInt nstash,reallocs; 330 InsertMode addv; 331 332 PetscFunctionBegin; 333 if (aij->donotstash) { 334 PetscFunctionReturn(0); 335 } 336 337 /* make sure all processors are either in INSERTMODE or ADDMODE */ 338 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 339 if (addv == (ADD_VALUES|INSERT_VALUES)) { 340 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 341 } 342 mat->insertmode = addv; /* in case this processor had no cache */ 343 344 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 345 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 346 ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 #undef __FUNCT__ 351 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 352 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 353 { 354 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 355 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 356 PetscErrorCode ierr; 357 PetscMPIInt n; 358 PetscInt i,j,rstart,ncols,flg; 359 PetscInt *row,*col,other_disassembled; 360 PetscScalar *val; 361 InsertMode addv = mat->insertmode; 362 363 PetscFunctionBegin; 364 if (!aij->donotstash) { 365 while (1) { 366 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 367 if (!flg) break; 368 369 for (i=0; i<n;) { 370 /* Now identify the consecutive vals belonging to the same row */ 371 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 372 if (j < n) ncols = j-i; 373 else ncols = n-i; 374 /* Now assemble all these values with a single function call */ 375 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 376 i = j; 377 } 378 } 379 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 380 } 381 a->compressedrow.use = PETSC_FALSE; 382 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 383 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 384 385 /* determine if any processor has disassembled, if so we must 386 also disassemble ourselfs, in order that we may reassemble. */ 387 /* 388 if nonzero structure of submatrix B cannot change then we know that 389 no processor disassembled thus we can skip this stuff 390 */ 391 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 392 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 393 if (mat->was_assembled && !other_disassembled) { 394 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 } 397 /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */ 398 b = (Mat_SeqAIJ *)aij->B->data; 399 400 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 401 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 402 } 403 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 404 b->compressedrow.use = PETSC_TRUE; 405 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 406 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 407 408 if (aij->rowvalues) { 409 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 410 aij->rowvalues = 0; 411 } 412 413 /* used by MatAXPY() */ 414 a->xtoy = 0; b->xtoy = 0; 415 a->XtoY = 0; b->XtoY = 0; 416 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 422 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 423 { 424 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 425 PetscErrorCode ierr; 426 427 PetscFunctionBegin; 428 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 429 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatZeroRows_MPIAIJ" 435 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 436 { 437 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 438 PetscErrorCode ierr; 439 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 440 PetscInt i,N,*rows,*owners = l->rowners; 441 PetscInt *nprocs,j,idx,nsends,row; 442 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 443 PetscInt *rvalues,count,base,slen,*source; 444 PetscInt *lens,*lrows,*values,rstart=l->rstart; 445 MPI_Comm comm = A->comm; 446 MPI_Request *send_waits,*recv_waits; 447 MPI_Status recv_status,*send_status; 448 IS istmp; 449 #if defined(PETSC_DEBUG) 450 PetscTruth found = PETSC_FALSE; 451 #endif 452 453 PetscFunctionBegin; 454 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 455 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 456 457 /* first count number of contributors to each processor */ 458 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 459 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 460 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 461 j = 0; 462 for (i=0; i<N; i++) { 463 if (lastidx > (idx = rows[i])) j = 0; 464 lastidx = idx; 465 for (; j<size; j++) { 466 if (idx >= owners[j] && idx < owners[j+1]) { 467 nprocs[2*j]++; 468 nprocs[2*j+1] = 1; 469 owner[i] = j; 470 #if defined(PETSC_DEBUG) 471 found = PETSC_TRUE; 472 #endif 473 break; 474 } 475 } 476 #if defined(PETSC_DEBUG) 477 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 478 found = PETSC_FALSE; 479 #endif 480 } 481 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 482 483 /* inform other processors of number of messages and max length*/ 484 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 485 486 /* post receives: */ 487 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 488 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 489 for (i=0; i<nrecvs; i++) { 490 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 491 } 492 493 /* do sends: 494 1) starts[i] gives the starting index in svalues for stuff going to 495 the ith processor 496 */ 497 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 498 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 499 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 500 starts[0] = 0; 501 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 502 for (i=0; i<N; i++) { 503 svalues[starts[owner[i]]++] = rows[i]; 504 } 505 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 506 507 starts[0] = 0; 508 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 509 count = 0; 510 for (i=0; i<size; i++) { 511 if (nprocs[2*i+1]) { 512 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 513 } 514 } 515 ierr = PetscFree(starts);CHKERRQ(ierr); 516 517 base = owners[rank]; 518 519 /* wait on receives */ 520 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 521 source = lens + nrecvs; 522 count = nrecvs; slen = 0; 523 while (count) { 524 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 525 /* unpack receives into our local space */ 526 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 527 source[imdex] = recv_status.MPI_SOURCE; 528 lens[imdex] = n; 529 slen += n; 530 count--; 531 } 532 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 533 534 /* move the data into the send scatter */ 535 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 536 count = 0; 537 for (i=0; i<nrecvs; i++) { 538 values = rvalues + i*nmax; 539 for (j=0; j<lens[i]; j++) { 540 lrows[count++] = values[j] - base; 541 } 542 } 543 ierr = PetscFree(rvalues);CHKERRQ(ierr); 544 ierr = PetscFree(lens);CHKERRQ(ierr); 545 ierr = PetscFree(owner);CHKERRQ(ierr); 546 ierr = PetscFree(nprocs);CHKERRQ(ierr); 547 548 /* actually zap the local rows */ 549 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 550 ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr); 551 552 /* 553 Zero the required rows. If the "diagonal block" of the matrix 554 is square and the user wishes to set the diagonal we use seperate 555 code so that MatSetValues() is not called for each diagonal allocating 556 new memory, thus calling lots of mallocs and slowing things down. 557 558 Contributed by: Mathew Knepley 559 */ 560 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 561 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 562 if (diag && (l->A->M == l->A->N)) { 563 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 564 } else if (diag) { 565 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 566 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 567 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 568 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 569 } 570 for (i = 0; i < slen; i++) { 571 row = lrows[i] + rstart; 572 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 573 } 574 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 575 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 576 } else { 577 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 578 } 579 ierr = ISDestroy(istmp);CHKERRQ(ierr); 580 ierr = PetscFree(lrows);CHKERRQ(ierr); 581 582 /* wait on sends */ 583 if (nsends) { 584 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 585 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 586 ierr = PetscFree(send_status);CHKERRQ(ierr); 587 } 588 ierr = PetscFree(send_waits);CHKERRQ(ierr); 589 ierr = PetscFree(svalues);CHKERRQ(ierr); 590 591 PetscFunctionReturn(0); 592 } 593 594 #undef __FUNCT__ 595 #define __FUNCT__ "MatMult_MPIAIJ" 596 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 597 { 598 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 599 PetscErrorCode ierr; 600 PetscInt nt; 601 602 PetscFunctionBegin; 603 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 604 if (nt != A->n) { 605 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 606 } 607 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 608 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 609 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 610 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 611 PetscFunctionReturn(0); 612 } 613 614 #undef __FUNCT__ 615 #define __FUNCT__ "MatMultAdd_MPIAIJ" 616 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 617 { 618 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 619 PetscErrorCode ierr; 620 621 PetscFunctionBegin; 622 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 623 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 624 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 625 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 629 #undef __FUNCT__ 630 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 631 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 632 { 633 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 634 PetscErrorCode ierr; 635 PetscTruth merged; 636 637 PetscFunctionBegin; 638 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 639 /* do nondiagonal part */ 640 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 641 if (!merged) { 642 /* send it on its way */ 643 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 644 /* do local part */ 645 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 646 /* receive remote parts: note this assumes the values are not actually */ 647 /* added in yy until the next line, */ 648 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 649 } else { 650 /* do local part */ 651 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 652 /* send it on its way */ 653 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 654 /* values actually were received in the Begin() but we need to call this nop */ 655 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 656 } 657 PetscFunctionReturn(0); 658 } 659 660 EXTERN_C_BEGIN 661 #undef __FUNCT__ 662 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 663 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 664 { 665 MPI_Comm comm; 666 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 667 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 668 IS Me,Notme; 669 PetscErrorCode ierr; 670 PetscInt M,N,first,last,*notme,i; 671 PetscMPIInt size; 672 673 PetscFunctionBegin; 674 675 /* Easy test: symmetric diagonal block */ 676 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 677 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 678 if (!*f) PetscFunctionReturn(0); 679 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 680 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 681 if (size == 1) PetscFunctionReturn(0); 682 683 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 684 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 685 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 686 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 687 for (i=0; i<first; i++) notme[i] = i; 688 for (i=last; i<M; i++) notme[i-last+first] = i; 689 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 690 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 691 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 692 Aoff = Aoffs[0]; 693 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 694 Boff = Boffs[0]; 695 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 696 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 697 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 698 ierr = ISDestroy(Me);CHKERRQ(ierr); 699 ierr = ISDestroy(Notme);CHKERRQ(ierr); 700 701 PetscFunctionReturn(0); 702 } 703 EXTERN_C_END 704 705 #undef __FUNCT__ 706 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 707 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 708 { 709 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 710 PetscErrorCode ierr; 711 712 PetscFunctionBegin; 713 /* do nondiagonal part */ 714 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 715 /* send it on its way */ 716 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 717 /* do local part */ 718 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 719 /* receive remote parts */ 720 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 721 PetscFunctionReturn(0); 722 } 723 724 /* 725 This only works correctly for square matrices where the subblock A->A is the 726 diagonal block 727 */ 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 730 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 731 { 732 PetscErrorCode ierr; 733 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 734 735 PetscFunctionBegin; 736 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 737 if (a->rstart != a->cstart || a->rend != a->cend) { 738 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 739 } 740 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatScale_MPIAIJ" 746 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 747 { 748 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 749 PetscErrorCode ierr; 750 751 PetscFunctionBegin; 752 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 753 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatDestroy_MPIAIJ" 759 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 760 { 761 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 762 PetscErrorCode ierr; 763 764 PetscFunctionBegin; 765 #if defined(PETSC_USE_LOG) 766 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 767 #endif 768 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 769 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 770 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 771 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 772 #if defined (PETSC_USE_CTABLE) 773 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 774 #else 775 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 776 #endif 777 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 778 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 779 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 780 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 781 ierr = PetscFree(aij);CHKERRQ(ierr); 782 783 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 784 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 785 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 786 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 787 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 788 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 789 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 790 PetscFunctionReturn(0); 791 } 792 793 #undef __FUNCT__ 794 #define __FUNCT__ "MatView_MPIAIJ_Binary" 795 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 796 { 797 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 798 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 799 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 800 PetscErrorCode ierr; 801 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 802 int fd; 803 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 804 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 805 PetscScalar *column_values; 806 807 PetscFunctionBegin; 808 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 809 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 810 nz = A->nz + B->nz; 811 if (!rank) { 812 header[0] = MAT_FILE_COOKIE; 813 header[1] = mat->M; 814 header[2] = mat->N; 815 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 816 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 817 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 818 /* get largest number of rows any processor has */ 819 rlen = mat->m; 820 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 821 for (i=1; i<size; i++) { 822 rlen = PetscMax(rlen,range[i+1] - range[i]); 823 } 824 } else { 825 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 826 rlen = mat->m; 827 } 828 829 /* load up the local row counts */ 830 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 831 for (i=0; i<mat->m; i++) { 832 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 833 } 834 835 /* store the row lengths to the file */ 836 if (!rank) { 837 MPI_Status status; 838 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 839 for (i=1; i<size; i++) { 840 rlen = range[i+1] - range[i]; 841 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 842 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 843 } 844 } else { 845 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 846 } 847 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 848 849 /* load up the local column indices */ 850 nzmax = nz; /* )th processor needs space a largest processor needs */ 851 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 852 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 853 cnt = 0; 854 for (i=0; i<mat->m; i++) { 855 for (j=B->i[i]; j<B->i[i+1]; j++) { 856 if ( (col = garray[B->j[j]]) > cstart) break; 857 column_indices[cnt++] = col; 858 } 859 for (k=A->i[i]; k<A->i[i+1]; k++) { 860 column_indices[cnt++] = A->j[k] + cstart; 861 } 862 for (; j<B->i[i+1]; j++) { 863 column_indices[cnt++] = garray[B->j[j]]; 864 } 865 } 866 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 867 868 /* store the column indices to the file */ 869 if (!rank) { 870 MPI_Status status; 871 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 872 for (i=1; i<size; i++) { 873 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 874 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 875 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 876 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 877 } 878 } else { 879 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 880 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 881 } 882 ierr = PetscFree(column_indices);CHKERRQ(ierr); 883 884 /* load up the local column values */ 885 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 886 cnt = 0; 887 for (i=0; i<mat->m; i++) { 888 for (j=B->i[i]; j<B->i[i+1]; j++) { 889 if ( garray[B->j[j]] > cstart) break; 890 column_values[cnt++] = B->a[j]; 891 } 892 for (k=A->i[i]; k<A->i[i+1]; k++) { 893 column_values[cnt++] = A->a[k]; 894 } 895 for (; j<B->i[i+1]; j++) { 896 column_values[cnt++] = B->a[j]; 897 } 898 } 899 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 900 901 /* store the column values to the file */ 902 if (!rank) { 903 MPI_Status status; 904 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 905 for (i=1; i<size; i++) { 906 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 907 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 908 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 909 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 910 } 911 } else { 912 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 913 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 914 } 915 ierr = PetscFree(column_values);CHKERRQ(ierr); 916 PetscFunctionReturn(0); 917 } 918 919 #undef __FUNCT__ 920 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 921 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 922 { 923 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 924 PetscErrorCode ierr; 925 PetscMPIInt rank = aij->rank,size = aij->size; 926 PetscTruth isdraw,iascii,isbinary; 927 PetscViewer sviewer; 928 PetscViewerFormat format; 929 930 PetscFunctionBegin; 931 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 932 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 933 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 934 if (iascii) { 935 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 936 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 937 MatInfo info; 938 PetscTruth inodes; 939 940 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 941 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 942 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 943 if (!inodes) { 944 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 945 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 946 } else { 947 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 948 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 949 } 950 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 951 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 952 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 953 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 954 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 955 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 956 PetscFunctionReturn(0); 957 } else if (format == PETSC_VIEWER_ASCII_INFO) { 958 PetscInt inodecount,inodelimit,*inodes; 959 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 960 if (inodes) { 961 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 962 } else { 963 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 964 } 965 PetscFunctionReturn(0); 966 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 967 PetscFunctionReturn(0); 968 } 969 } else if (isbinary) { 970 if (size == 1) { 971 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 972 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 973 } else { 974 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 975 } 976 PetscFunctionReturn(0); 977 } else if (isdraw) { 978 PetscDraw draw; 979 PetscTruth isnull; 980 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 981 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 982 } 983 984 if (size == 1) { 985 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 986 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 987 } else { 988 /* assemble the entire matrix onto first processor. */ 989 Mat A; 990 Mat_SeqAIJ *Aloc; 991 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 992 PetscScalar *a; 993 994 if (!rank) { 995 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 996 } else { 997 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 998 } 999 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1000 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1001 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1002 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1003 1004 /* copy over the A part */ 1005 Aloc = (Mat_SeqAIJ*)aij->A->data; 1006 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1007 row = aij->rstart; 1008 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 1009 for (i=0; i<m; i++) { 1010 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1011 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1012 } 1013 aj = Aloc->j; 1014 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 1015 1016 /* copy over the B part */ 1017 Aloc = (Mat_SeqAIJ*)aij->B->data; 1018 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1019 row = aij->rstart; 1020 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1021 ct = cols; 1022 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1023 for (i=0; i<m; i++) { 1024 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1025 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1026 } 1027 ierr = PetscFree(ct);CHKERRQ(ierr); 1028 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1029 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1030 /* 1031 Everyone has to call to draw the matrix since the graphics waits are 1032 synchronized across all processors that share the PetscDraw object 1033 */ 1034 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1035 if (!rank) { 1036 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1037 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1038 } 1039 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1040 ierr = MatDestroy(A);CHKERRQ(ierr); 1041 } 1042 PetscFunctionReturn(0); 1043 } 1044 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "MatView_MPIAIJ" 1047 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1048 { 1049 PetscErrorCode ierr; 1050 PetscTruth iascii,isdraw,issocket,isbinary; 1051 1052 PetscFunctionBegin; 1053 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1054 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1055 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1056 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1057 if (iascii || isdraw || isbinary || issocket) { 1058 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1059 } else { 1060 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1061 } 1062 PetscFunctionReturn(0); 1063 } 1064 1065 1066 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatRelax_MPIAIJ" 1069 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1070 { 1071 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1072 PetscErrorCode ierr; 1073 Vec bb1; 1074 PetscScalar mone=-1.0; 1075 1076 PetscFunctionBegin; 1077 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1078 1079 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1080 1081 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1082 if (flag & SOR_ZERO_INITIAL_GUESS) { 1083 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1084 its--; 1085 } 1086 1087 while (its--) { 1088 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1089 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1090 1091 /* update rhs: bb1 = bb - B*x */ 1092 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1093 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1094 1095 /* local sweep */ 1096 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1097 CHKERRQ(ierr); 1098 } 1099 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1100 if (flag & SOR_ZERO_INITIAL_GUESS) { 1101 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1102 its--; 1103 } 1104 while (its--) { 1105 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1106 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1107 1108 /* update rhs: bb1 = bb - B*x */ 1109 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1110 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1111 1112 /* local sweep */ 1113 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1114 CHKERRQ(ierr); 1115 } 1116 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1117 if (flag & SOR_ZERO_INITIAL_GUESS) { 1118 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1119 its--; 1120 } 1121 while (its--) { 1122 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1123 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1124 1125 /* update rhs: bb1 = bb - B*x */ 1126 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1127 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1128 1129 /* local sweep */ 1130 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1131 CHKERRQ(ierr); 1132 } 1133 } else { 1134 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1135 } 1136 1137 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1143 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1144 { 1145 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1146 Mat A = mat->A,B = mat->B; 1147 PetscErrorCode ierr; 1148 PetscReal isend[5],irecv[5]; 1149 1150 PetscFunctionBegin; 1151 info->block_size = 1.0; 1152 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1153 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1154 isend[3] = info->memory; isend[4] = info->mallocs; 1155 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1156 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1157 isend[3] += info->memory; isend[4] += info->mallocs; 1158 if (flag == MAT_LOCAL) { 1159 info->nz_used = isend[0]; 1160 info->nz_allocated = isend[1]; 1161 info->nz_unneeded = isend[2]; 1162 info->memory = isend[3]; 1163 info->mallocs = isend[4]; 1164 } else if (flag == MAT_GLOBAL_MAX) { 1165 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1166 info->nz_used = irecv[0]; 1167 info->nz_allocated = irecv[1]; 1168 info->nz_unneeded = irecv[2]; 1169 info->memory = irecv[3]; 1170 info->mallocs = irecv[4]; 1171 } else if (flag == MAT_GLOBAL_SUM) { 1172 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1173 info->nz_used = irecv[0]; 1174 info->nz_allocated = irecv[1]; 1175 info->nz_unneeded = irecv[2]; 1176 info->memory = irecv[3]; 1177 info->mallocs = irecv[4]; 1178 } 1179 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1180 info->fill_ratio_needed = 0; 1181 info->factor_mallocs = 0; 1182 info->rows_global = (double)matin->M; 1183 info->columns_global = (double)matin->N; 1184 info->rows_local = (double)matin->m; 1185 info->columns_local = (double)matin->N; 1186 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatSetOption_MPIAIJ" 1192 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1193 { 1194 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1195 PetscErrorCode ierr; 1196 1197 PetscFunctionBegin; 1198 switch (op) { 1199 case MAT_NO_NEW_NONZERO_LOCATIONS: 1200 case MAT_YES_NEW_NONZERO_LOCATIONS: 1201 case MAT_COLUMNS_UNSORTED: 1202 case MAT_COLUMNS_SORTED: 1203 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1204 case MAT_KEEP_ZEROED_ROWS: 1205 case MAT_NEW_NONZERO_LOCATION_ERR: 1206 case MAT_USE_INODES: 1207 case MAT_DO_NOT_USE_INODES: 1208 case MAT_IGNORE_ZERO_ENTRIES: 1209 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1210 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1211 break; 1212 case MAT_ROW_ORIENTED: 1213 a->roworiented = PETSC_TRUE; 1214 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1215 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1216 break; 1217 case MAT_ROWS_SORTED: 1218 case MAT_ROWS_UNSORTED: 1219 case MAT_YES_NEW_DIAGONALS: 1220 ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr); 1221 break; 1222 case MAT_COLUMN_ORIENTED: 1223 a->roworiented = PETSC_FALSE; 1224 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1225 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1226 break; 1227 case MAT_IGNORE_OFF_PROC_ENTRIES: 1228 a->donotstash = PETSC_TRUE; 1229 break; 1230 case MAT_NO_NEW_DIAGONALS: 1231 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1232 case MAT_SYMMETRIC: 1233 case MAT_STRUCTURALLY_SYMMETRIC: 1234 case MAT_HERMITIAN: 1235 case MAT_SYMMETRY_ETERNAL: 1236 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1237 break; 1238 case MAT_NOT_SYMMETRIC: 1239 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1240 case MAT_NOT_HERMITIAN: 1241 case MAT_NOT_SYMMETRY_ETERNAL: 1242 break; 1243 default: 1244 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1245 } 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatGetRow_MPIAIJ" 1251 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1252 { 1253 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1254 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1255 PetscErrorCode ierr; 1256 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1257 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1258 PetscInt *cmap,*idx_p; 1259 1260 PetscFunctionBegin; 1261 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1262 mat->getrowactive = PETSC_TRUE; 1263 1264 if (!mat->rowvalues && (idx || v)) { 1265 /* 1266 allocate enough space to hold information from the longest row. 1267 */ 1268 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1269 PetscInt max = 1,tmp; 1270 for (i=0; i<matin->m; i++) { 1271 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1272 if (max < tmp) { max = tmp; } 1273 } 1274 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1275 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1276 } 1277 1278 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1279 lrow = row - rstart; 1280 1281 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1282 if (!v) {pvA = 0; pvB = 0;} 1283 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1284 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1285 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1286 nztot = nzA + nzB; 1287 1288 cmap = mat->garray; 1289 if (v || idx) { 1290 if (nztot) { 1291 /* Sort by increasing column numbers, assuming A and B already sorted */ 1292 PetscInt imark = -1; 1293 if (v) { 1294 *v = v_p = mat->rowvalues; 1295 for (i=0; i<nzB; i++) { 1296 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1297 else break; 1298 } 1299 imark = i; 1300 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1301 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1302 } 1303 if (idx) { 1304 *idx = idx_p = mat->rowindices; 1305 if (imark > -1) { 1306 for (i=0; i<imark; i++) { 1307 idx_p[i] = cmap[cworkB[i]]; 1308 } 1309 } else { 1310 for (i=0; i<nzB; i++) { 1311 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1312 else break; 1313 } 1314 imark = i; 1315 } 1316 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1317 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1318 } 1319 } else { 1320 if (idx) *idx = 0; 1321 if (v) *v = 0; 1322 } 1323 } 1324 *nz = nztot; 1325 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1326 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1327 PetscFunctionReturn(0); 1328 } 1329 1330 #undef __FUNCT__ 1331 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1332 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1333 { 1334 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1335 1336 PetscFunctionBegin; 1337 if (!aij->getrowactive) { 1338 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1339 } 1340 aij->getrowactive = PETSC_FALSE; 1341 PetscFunctionReturn(0); 1342 } 1343 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "MatNorm_MPIAIJ" 1346 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1347 { 1348 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1349 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1350 PetscErrorCode ierr; 1351 PetscInt i,j,cstart = aij->cstart; 1352 PetscReal sum = 0.0; 1353 PetscScalar *v; 1354 1355 PetscFunctionBegin; 1356 if (aij->size == 1) { 1357 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1358 } else { 1359 if (type == NORM_FROBENIUS) { 1360 v = amat->a; 1361 for (i=0; i<amat->nz; i++) { 1362 #if defined(PETSC_USE_COMPLEX) 1363 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1364 #else 1365 sum += (*v)*(*v); v++; 1366 #endif 1367 } 1368 v = bmat->a; 1369 for (i=0; i<bmat->nz; i++) { 1370 #if defined(PETSC_USE_COMPLEX) 1371 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1372 #else 1373 sum += (*v)*(*v); v++; 1374 #endif 1375 } 1376 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1377 *norm = sqrt(*norm); 1378 } else if (type == NORM_1) { /* max column norm */ 1379 PetscReal *tmp,*tmp2; 1380 PetscInt *jj,*garray = aij->garray; 1381 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1382 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1383 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1384 *norm = 0.0; 1385 v = amat->a; jj = amat->j; 1386 for (j=0; j<amat->nz; j++) { 1387 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1388 } 1389 v = bmat->a; jj = bmat->j; 1390 for (j=0; j<bmat->nz; j++) { 1391 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1392 } 1393 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1394 for (j=0; j<mat->N; j++) { 1395 if (tmp2[j] > *norm) *norm = tmp2[j]; 1396 } 1397 ierr = PetscFree(tmp);CHKERRQ(ierr); 1398 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1399 } else if (type == NORM_INFINITY) { /* max row norm */ 1400 PetscReal ntemp = 0.0; 1401 for (j=0; j<aij->A->m; j++) { 1402 v = amat->a + amat->i[j]; 1403 sum = 0.0; 1404 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1405 sum += PetscAbsScalar(*v); v++; 1406 } 1407 v = bmat->a + bmat->i[j]; 1408 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1409 sum += PetscAbsScalar(*v); v++; 1410 } 1411 if (sum > ntemp) ntemp = sum; 1412 } 1413 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1414 } else { 1415 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1416 } 1417 } 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatTranspose_MPIAIJ" 1423 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1424 { 1425 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1426 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1427 PetscErrorCode ierr; 1428 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1429 Mat B; 1430 PetscScalar *array; 1431 1432 PetscFunctionBegin; 1433 if (!matout && M != N) { 1434 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1435 } 1436 1437 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1438 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1439 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1440 1441 /* copy over the A part */ 1442 Aloc = (Mat_SeqAIJ*)a->A->data; 1443 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1444 row = a->rstart; 1445 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1446 for (i=0; i<m; i++) { 1447 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1448 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1449 } 1450 aj = Aloc->j; 1451 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1452 1453 /* copy over the B part */ 1454 Aloc = (Mat_SeqAIJ*)a->B->data; 1455 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1456 row = a->rstart; 1457 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1458 ct = cols; 1459 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1460 for (i=0; i<m; i++) { 1461 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1462 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1463 } 1464 ierr = PetscFree(ct);CHKERRQ(ierr); 1465 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1466 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1467 if (matout) { 1468 *matout = B; 1469 } else { 1470 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1471 } 1472 PetscFunctionReturn(0); 1473 } 1474 1475 #undef __FUNCT__ 1476 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1477 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1478 { 1479 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1480 Mat a = aij->A,b = aij->B; 1481 PetscErrorCode ierr; 1482 PetscInt s1,s2,s3; 1483 1484 PetscFunctionBegin; 1485 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1486 if (rr) { 1487 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1488 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1489 /* Overlap communication with computation. */ 1490 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1491 } 1492 if (ll) { 1493 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1494 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1495 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1496 } 1497 /* scale the diagonal block */ 1498 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1499 1500 if (rr) { 1501 /* Do a scatter end and then right scale the off-diagonal block */ 1502 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1503 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1504 } 1505 1506 PetscFunctionReturn(0); 1507 } 1508 1509 1510 #undef __FUNCT__ 1511 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1512 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1513 { 1514 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1515 PetscErrorCode ierr; 1516 1517 PetscFunctionBegin; 1518 if (!a->rank) { 1519 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1520 } 1521 PetscFunctionReturn(0); 1522 } 1523 1524 #undef __FUNCT__ 1525 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1526 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1527 { 1528 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1529 PetscErrorCode ierr; 1530 1531 PetscFunctionBegin; 1532 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1533 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1534 PetscFunctionReturn(0); 1535 } 1536 #undef __FUNCT__ 1537 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1538 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1539 { 1540 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1541 PetscErrorCode ierr; 1542 1543 PetscFunctionBegin; 1544 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1545 PetscFunctionReturn(0); 1546 } 1547 1548 #undef __FUNCT__ 1549 #define __FUNCT__ "MatEqual_MPIAIJ" 1550 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1551 { 1552 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1553 Mat a,b,c,d; 1554 PetscTruth flg; 1555 PetscErrorCode ierr; 1556 1557 PetscFunctionBegin; 1558 a = matA->A; b = matA->B; 1559 c = matB->A; d = matB->B; 1560 1561 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1562 if (flg) { 1563 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1564 } 1565 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1566 PetscFunctionReturn(0); 1567 } 1568 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "MatCopy_MPIAIJ" 1571 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1572 { 1573 PetscErrorCode ierr; 1574 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1575 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1576 1577 PetscFunctionBegin; 1578 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1579 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1580 /* because of the column compression in the off-processor part of the matrix a->B, 1581 the number of columns in a->B and b->B may be different, hence we cannot call 1582 the MatCopy() directly on the two parts. If need be, we can provide a more 1583 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1584 then copying the submatrices */ 1585 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1586 } else { 1587 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1588 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1589 } 1590 PetscFunctionReturn(0); 1591 } 1592 1593 #undef __FUNCT__ 1594 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1595 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1596 { 1597 PetscErrorCode ierr; 1598 1599 PetscFunctionBegin; 1600 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1601 PetscFunctionReturn(0); 1602 } 1603 1604 #include "petscblaslapack.h" 1605 #undef __FUNCT__ 1606 #define __FUNCT__ "MatAXPY_MPIAIJ" 1607 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1608 { 1609 PetscErrorCode ierr; 1610 PetscInt i; 1611 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1612 PetscBLASInt bnz,one=1; 1613 Mat_SeqAIJ *x,*y; 1614 1615 PetscFunctionBegin; 1616 if (str == SAME_NONZERO_PATTERN) { 1617 x = (Mat_SeqAIJ *)xx->A->data; 1618 y = (Mat_SeqAIJ *)yy->A->data; 1619 bnz = (PetscBLASInt)x->nz; 1620 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1621 x = (Mat_SeqAIJ *)xx->B->data; 1622 y = (Mat_SeqAIJ *)yy->B->data; 1623 bnz = (PetscBLASInt)x->nz; 1624 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1625 } else if (str == SUBSET_NONZERO_PATTERN) { 1626 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1627 1628 x = (Mat_SeqAIJ *)xx->B->data; 1629 y = (Mat_SeqAIJ *)yy->B->data; 1630 if (y->xtoy && y->XtoY != xx->B) { 1631 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1632 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1633 } 1634 if (!y->xtoy) { /* get xtoy */ 1635 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1636 y->XtoY = xx->B; 1637 } 1638 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1639 } else { 1640 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1641 } 1642 PetscFunctionReturn(0); 1643 } 1644 1645 /* -------------------------------------------------------------------*/ 1646 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1647 MatGetRow_MPIAIJ, 1648 MatRestoreRow_MPIAIJ, 1649 MatMult_MPIAIJ, 1650 /* 4*/ MatMultAdd_MPIAIJ, 1651 MatMultTranspose_MPIAIJ, 1652 MatMultTransposeAdd_MPIAIJ, 1653 0, 1654 0, 1655 0, 1656 /*10*/ 0, 1657 0, 1658 0, 1659 MatRelax_MPIAIJ, 1660 MatTranspose_MPIAIJ, 1661 /*15*/ MatGetInfo_MPIAIJ, 1662 MatEqual_MPIAIJ, 1663 MatGetDiagonal_MPIAIJ, 1664 MatDiagonalScale_MPIAIJ, 1665 MatNorm_MPIAIJ, 1666 /*20*/ MatAssemblyBegin_MPIAIJ, 1667 MatAssemblyEnd_MPIAIJ, 1668 0, 1669 MatSetOption_MPIAIJ, 1670 MatZeroEntries_MPIAIJ, 1671 /*25*/ MatZeroRows_MPIAIJ, 1672 0, 1673 0, 1674 0, 1675 0, 1676 /*30*/ MatSetUpPreallocation_MPIAIJ, 1677 0, 1678 0, 1679 0, 1680 0, 1681 /*35*/ MatDuplicate_MPIAIJ, 1682 0, 1683 0, 1684 0, 1685 0, 1686 /*40*/ MatAXPY_MPIAIJ, 1687 MatGetSubMatrices_MPIAIJ, 1688 MatIncreaseOverlap_MPIAIJ, 1689 MatGetValues_MPIAIJ, 1690 MatCopy_MPIAIJ, 1691 /*45*/ MatPrintHelp_MPIAIJ, 1692 MatScale_MPIAIJ, 1693 0, 1694 0, 1695 0, 1696 /*50*/ MatSetBlockSize_MPIAIJ, 1697 0, 1698 0, 1699 0, 1700 0, 1701 /*55*/ MatFDColoringCreate_MPIAIJ, 1702 0, 1703 MatSetUnfactored_MPIAIJ, 1704 0, 1705 0, 1706 /*60*/ MatGetSubMatrix_MPIAIJ, 1707 MatDestroy_MPIAIJ, 1708 MatView_MPIAIJ, 1709 MatGetPetscMaps_Petsc, 1710 0, 1711 /*65*/ 0, 1712 0, 1713 0, 1714 0, 1715 0, 1716 /*70*/ 0, 1717 0, 1718 MatSetColoring_MPIAIJ, 1719 #if defined(PETSC_HAVE_ADIC) 1720 MatSetValuesAdic_MPIAIJ, 1721 #else 1722 0, 1723 #endif 1724 MatSetValuesAdifor_MPIAIJ, 1725 /*75*/ 0, 1726 0, 1727 0, 1728 0, 1729 0, 1730 /*80*/ 0, 1731 0, 1732 0, 1733 0, 1734 /*84*/ MatLoad_MPIAIJ, 1735 0, 1736 0, 1737 0, 1738 0, 1739 0, 1740 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1741 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1742 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1743 MatPtAP_Basic, 1744 MatPtAPSymbolic_MPIAIJ, 1745 /*95*/ MatPtAPNumeric_MPIAIJ, 1746 0, 1747 0, 1748 0, 1749 0, 1750 /*100*/0, 1751 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1752 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1753 }; 1754 1755 /* ----------------------------------------------------------------------------------------*/ 1756 1757 EXTERN_C_BEGIN 1758 #undef __FUNCT__ 1759 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1760 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1761 { 1762 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1763 PetscErrorCode ierr; 1764 1765 PetscFunctionBegin; 1766 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1767 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1768 PetscFunctionReturn(0); 1769 } 1770 EXTERN_C_END 1771 1772 EXTERN_C_BEGIN 1773 #undef __FUNCT__ 1774 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1775 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1776 { 1777 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1778 PetscErrorCode ierr; 1779 1780 PetscFunctionBegin; 1781 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1782 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1783 PetscFunctionReturn(0); 1784 } 1785 EXTERN_C_END 1786 1787 #include "petscpc.h" 1788 EXTERN_C_BEGIN 1789 #undef __FUNCT__ 1790 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1791 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1792 { 1793 Mat_MPIAIJ *b; 1794 PetscErrorCode ierr; 1795 PetscInt i; 1796 1797 PetscFunctionBegin; 1798 B->preallocated = PETSC_TRUE; 1799 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1800 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1801 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1802 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1803 if (d_nnz) { 1804 for (i=0; i<B->m; i++) { 1805 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1806 } 1807 } 1808 if (o_nnz) { 1809 for (i=0; i<B->m; i++) { 1810 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1811 } 1812 } 1813 b = (Mat_MPIAIJ*)B->data; 1814 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1815 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1816 1817 PetscFunctionReturn(0); 1818 } 1819 EXTERN_C_END 1820 1821 #undef __FUNCT__ 1822 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1823 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1824 { 1825 Mat mat; 1826 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1827 PetscErrorCode ierr; 1828 1829 PetscFunctionBegin; 1830 *newmat = 0; 1831 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1832 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1833 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1834 a = (Mat_MPIAIJ*)mat->data; 1835 1836 mat->factor = matin->factor; 1837 mat->bs = matin->bs; 1838 mat->assembled = PETSC_TRUE; 1839 mat->insertmode = NOT_SET_VALUES; 1840 mat->preallocated = PETSC_TRUE; 1841 1842 a->rstart = oldmat->rstart; 1843 a->rend = oldmat->rend; 1844 a->cstart = oldmat->cstart; 1845 a->cend = oldmat->cend; 1846 a->size = oldmat->size; 1847 a->rank = oldmat->rank; 1848 a->donotstash = oldmat->donotstash; 1849 a->roworiented = oldmat->roworiented; 1850 a->rowindices = 0; 1851 a->rowvalues = 0; 1852 a->getrowactive = PETSC_FALSE; 1853 1854 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1855 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1856 if (oldmat->colmap) { 1857 #if defined (PETSC_USE_CTABLE) 1858 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1859 #else 1860 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1861 ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1862 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1863 #endif 1864 } else a->colmap = 0; 1865 if (oldmat->garray) { 1866 PetscInt len; 1867 len = oldmat->B->n; 1868 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1869 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1870 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1871 } else a->garray = 0; 1872 1873 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1874 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1875 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1876 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1877 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1878 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1879 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1880 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1881 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1882 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1883 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1884 *newmat = mat; 1885 PetscFunctionReturn(0); 1886 } 1887 1888 #include "petscsys.h" 1889 1890 #undef __FUNCT__ 1891 #define __FUNCT__ "MatLoad_MPIAIJ" 1892 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1893 { 1894 Mat A; 1895 PetscScalar *vals,*svals; 1896 MPI_Comm comm = ((PetscObject)viewer)->comm; 1897 MPI_Status status; 1898 PetscErrorCode ierr; 1899 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1900 PetscInt i,nz,j,rstart,rend,mmax; 1901 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1902 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1903 PetscInt cend,cstart,n,*rowners; 1904 int fd; 1905 1906 PetscFunctionBegin; 1907 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1908 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1909 if (!rank) { 1910 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1911 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1912 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1913 } 1914 1915 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1916 M = header[1]; N = header[2]; 1917 /* determine ownership of all rows */ 1918 m = M/size + ((M % size) > rank); 1919 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1920 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1921 1922 /* First process needs enough room for process with most rows */ 1923 if (!rank) { 1924 mmax = rowners[1]; 1925 for (i=2; i<size; i++) { 1926 mmax = PetscMax(mmax,rowners[i]); 1927 } 1928 } else mmax = m; 1929 1930 rowners[0] = 0; 1931 for (i=2; i<=size; i++) { 1932 mmax = PetscMax(mmax,rowners[i]); 1933 rowners[i] += rowners[i-1]; 1934 } 1935 rstart = rowners[rank]; 1936 rend = rowners[rank+1]; 1937 1938 /* distribute row lengths to all processors */ 1939 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 1940 if (!rank) { 1941 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1942 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1943 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1944 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1945 for (j=0; j<m; j++) { 1946 procsnz[0] += ourlens[j]; 1947 } 1948 for (i=1; i<size; i++) { 1949 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1950 /* calculate the number of nonzeros on each processor */ 1951 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1952 procsnz[i] += rowlengths[j]; 1953 } 1954 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1955 } 1956 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1957 } else { 1958 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1959 } 1960 1961 if (!rank) { 1962 /* determine max buffer needed and allocate it */ 1963 maxnz = 0; 1964 for (i=0; i<size; i++) { 1965 maxnz = PetscMax(maxnz,procsnz[i]); 1966 } 1967 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1968 1969 /* read in my part of the matrix column indices */ 1970 nz = procsnz[0]; 1971 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1972 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1973 1974 /* read in every one elses and ship off */ 1975 for (i=1; i<size; i++) { 1976 nz = procsnz[i]; 1977 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1978 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1979 } 1980 ierr = PetscFree(cols);CHKERRQ(ierr); 1981 } else { 1982 /* determine buffer space needed for message */ 1983 nz = 0; 1984 for (i=0; i<m; i++) { 1985 nz += ourlens[i]; 1986 } 1987 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1988 1989 /* receive message of column indices*/ 1990 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1991 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1992 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1993 } 1994 1995 /* determine column ownership if matrix is not square */ 1996 if (N != M) { 1997 n = N/size + ((N % size) > rank); 1998 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1999 cstart = cend - n; 2000 } else { 2001 cstart = rstart; 2002 cend = rend; 2003 n = cend - cstart; 2004 } 2005 2006 /* loop over local rows, determining number of off diagonal entries */ 2007 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2008 jj = 0; 2009 for (i=0; i<m; i++) { 2010 for (j=0; j<ourlens[i]; j++) { 2011 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2012 jj++; 2013 } 2014 } 2015 2016 /* create our matrix */ 2017 for (i=0; i<m; i++) { 2018 ourlens[i] -= offlens[i]; 2019 } 2020 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2021 ierr = MatSetType(A,type);CHKERRQ(ierr); 2022 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2023 2024 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2025 for (i=0; i<m; i++) { 2026 ourlens[i] += offlens[i]; 2027 } 2028 2029 if (!rank) { 2030 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2031 2032 /* read in my part of the matrix numerical values */ 2033 nz = procsnz[0]; 2034 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2035 2036 /* insert into matrix */ 2037 jj = rstart; 2038 smycols = mycols; 2039 svals = vals; 2040 for (i=0; i<m; i++) { 2041 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2042 smycols += ourlens[i]; 2043 svals += ourlens[i]; 2044 jj++; 2045 } 2046 2047 /* read in other processors and ship out */ 2048 for (i=1; i<size; i++) { 2049 nz = procsnz[i]; 2050 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2051 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2052 } 2053 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2054 } else { 2055 /* receive numeric values */ 2056 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2057 2058 /* receive message of values*/ 2059 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2060 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2061 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2062 2063 /* insert into matrix */ 2064 jj = rstart; 2065 smycols = mycols; 2066 svals = vals; 2067 for (i=0; i<m; i++) { 2068 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2069 smycols += ourlens[i]; 2070 svals += ourlens[i]; 2071 jj++; 2072 } 2073 } 2074 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2075 ierr = PetscFree(vals);CHKERRQ(ierr); 2076 ierr = PetscFree(mycols);CHKERRQ(ierr); 2077 ierr = PetscFree(rowners);CHKERRQ(ierr); 2078 2079 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2080 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2081 *newmat = A; 2082 PetscFunctionReturn(0); 2083 } 2084 2085 #undef __FUNCT__ 2086 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2087 /* 2088 Not great since it makes two copies of the submatrix, first an SeqAIJ 2089 in local and then by concatenating the local matrices the end result. 2090 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2091 */ 2092 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2093 { 2094 PetscErrorCode ierr; 2095 PetscMPIInt rank,size; 2096 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2097 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2098 Mat *local,M,Mreuse; 2099 PetscScalar *vwork,*aa; 2100 MPI_Comm comm = mat->comm; 2101 Mat_SeqAIJ *aij; 2102 2103 2104 PetscFunctionBegin; 2105 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2106 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2107 2108 if (call == MAT_REUSE_MATRIX) { 2109 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2110 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2111 local = &Mreuse; 2112 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2113 } else { 2114 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2115 Mreuse = *local; 2116 ierr = PetscFree(local);CHKERRQ(ierr); 2117 } 2118 2119 /* 2120 m - number of local rows 2121 n - number of columns (same on all processors) 2122 rstart - first row in new global matrix generated 2123 */ 2124 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2125 if (call == MAT_INITIAL_MATRIX) { 2126 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2127 ii = aij->i; 2128 jj = aij->j; 2129 2130 /* 2131 Determine the number of non-zeros in the diagonal and off-diagonal 2132 portions of the matrix in order to do correct preallocation 2133 */ 2134 2135 /* first get start and end of "diagonal" columns */ 2136 if (csize == PETSC_DECIDE) { 2137 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2138 if (mglobal == n) { /* square matrix */ 2139 nlocal = m; 2140 } else { 2141 nlocal = n/size + ((n % size) > rank); 2142 } 2143 } else { 2144 nlocal = csize; 2145 } 2146 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2147 rstart = rend - nlocal; 2148 if (rank == size - 1 && rend != n) { 2149 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2150 } 2151 2152 /* next, compute all the lengths */ 2153 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2154 olens = dlens + m; 2155 for (i=0; i<m; i++) { 2156 jend = ii[i+1] - ii[i]; 2157 olen = 0; 2158 dlen = 0; 2159 for (j=0; j<jend; j++) { 2160 if (*jj < rstart || *jj >= rend) olen++; 2161 else dlen++; 2162 jj++; 2163 } 2164 olens[i] = olen; 2165 dlens[i] = dlen; 2166 } 2167 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2168 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2169 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2170 ierr = PetscFree(dlens);CHKERRQ(ierr); 2171 } else { 2172 PetscInt ml,nl; 2173 2174 M = *newmat; 2175 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2176 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2177 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2178 /* 2179 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2180 rather than the slower MatSetValues(). 2181 */ 2182 M->was_assembled = PETSC_TRUE; 2183 M->assembled = PETSC_FALSE; 2184 } 2185 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2186 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2187 ii = aij->i; 2188 jj = aij->j; 2189 aa = aij->a; 2190 for (i=0; i<m; i++) { 2191 row = rstart + i; 2192 nz = ii[i+1] - ii[i]; 2193 cwork = jj; jj += nz; 2194 vwork = aa; aa += nz; 2195 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2196 } 2197 2198 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2199 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2200 *newmat = M; 2201 2202 /* save submatrix used in processor for next request */ 2203 if (call == MAT_INITIAL_MATRIX) { 2204 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2205 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2206 } 2207 2208 PetscFunctionReturn(0); 2209 } 2210 2211 EXTERN_C_BEGIN 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2214 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2215 { 2216 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2217 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2218 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2219 const PetscInt *JJ; 2220 PetscScalar *values; 2221 PetscErrorCode ierr; 2222 2223 PetscFunctionBegin; 2224 #if defined(PETSC_OPT_g) 2225 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2226 #endif 2227 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2228 o_nnz = d_nnz + m; 2229 2230 for (i=0; i<m; i++) { 2231 nnz = I[i+1]- I[i]; 2232 JJ = J + I[i]; 2233 nnz_max = PetscMax(nnz_max,nnz); 2234 #if defined(PETSC_OPT_g) 2235 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2236 #endif 2237 for (j=0; j<nnz; j++) { 2238 if (*JJ >= cstart) break; 2239 JJ++; 2240 } 2241 d = 0; 2242 for (; j<nnz; j++) { 2243 if (*JJ++ >= cend) break; 2244 d++; 2245 } 2246 d_nnz[i] = d; 2247 o_nnz[i] = nnz - d; 2248 } 2249 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2250 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2251 2252 if (v) values = (PetscScalar*)v; 2253 else { 2254 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2255 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2256 } 2257 2258 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2259 for (i=0; i<m; i++) { 2260 ii = i + rstart; 2261 nnz = I[i+1]- I[i]; 2262 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2263 } 2264 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2265 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2266 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2267 2268 if (!v) { 2269 ierr = PetscFree(values);CHKERRQ(ierr); 2270 } 2271 PetscFunctionReturn(0); 2272 } 2273 EXTERN_C_END 2274 2275 #undef __FUNCT__ 2276 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2277 /*@C 2278 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2279 (the default parallel PETSc format). 2280 2281 Collective on MPI_Comm 2282 2283 Input Parameters: 2284 + A - the matrix 2285 . i - the indices into j for the start of each local row (starts with zero) 2286 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2287 - v - optional values in the matrix 2288 2289 Level: developer 2290 2291 .keywords: matrix, aij, compressed row, sparse, parallel 2292 2293 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2294 @*/ 2295 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2296 { 2297 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2298 2299 PetscFunctionBegin; 2300 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2301 if (f) { 2302 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2303 } 2304 PetscFunctionReturn(0); 2305 } 2306 2307 #undef __FUNCT__ 2308 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2309 /*@C 2310 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2311 (the default parallel PETSc format). For good matrix assembly performance 2312 the user should preallocate the matrix storage by setting the parameters 2313 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2314 performance can be increased by more than a factor of 50. 2315 2316 Collective on MPI_Comm 2317 2318 Input Parameters: 2319 + A - the matrix 2320 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2321 (same value is used for all local rows) 2322 . d_nnz - array containing the number of nonzeros in the various rows of the 2323 DIAGONAL portion of the local submatrix (possibly different for each row) 2324 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2325 The size of this array is equal to the number of local rows, i.e 'm'. 2326 You must leave room for the diagonal entry even if it is zero. 2327 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2328 submatrix (same value is used for all local rows). 2329 - o_nnz - array containing the number of nonzeros in the various rows of the 2330 OFF-DIAGONAL portion of the local submatrix (possibly different for 2331 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2332 structure. The size of this array is equal to the number 2333 of local rows, i.e 'm'. 2334 2335 If the *_nnz parameter is given then the *_nz parameter is ignored 2336 2337 The AIJ format (also called the Yale sparse matrix format or 2338 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2339 storage. The stored row and column indices begin with zero. See the users manual for details. 2340 2341 The parallel matrix is partitioned such that the first m0 rows belong to 2342 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2343 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2344 2345 The DIAGONAL portion of the local submatrix of a processor can be defined 2346 as the submatrix which is obtained by extraction the part corresponding 2347 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2348 first row that belongs to the processor, and r2 is the last row belonging 2349 to the this processor. This is a square mxm matrix. The remaining portion 2350 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2351 2352 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2353 2354 Example usage: 2355 2356 Consider the following 8x8 matrix with 34 non-zero values, that is 2357 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2358 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2359 as follows: 2360 2361 .vb 2362 1 2 0 | 0 3 0 | 0 4 2363 Proc0 0 5 6 | 7 0 0 | 8 0 2364 9 0 10 | 11 0 0 | 12 0 2365 ------------------------------------- 2366 13 0 14 | 15 16 17 | 0 0 2367 Proc1 0 18 0 | 19 20 21 | 0 0 2368 0 0 0 | 22 23 0 | 24 0 2369 ------------------------------------- 2370 Proc2 25 26 27 | 0 0 28 | 29 0 2371 30 0 0 | 31 32 33 | 0 34 2372 .ve 2373 2374 This can be represented as a collection of submatrices as: 2375 2376 .vb 2377 A B C 2378 D E F 2379 G H I 2380 .ve 2381 2382 Where the submatrices A,B,C are owned by proc0, D,E,F are 2383 owned by proc1, G,H,I are owned by proc2. 2384 2385 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2386 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2387 The 'M','N' parameters are 8,8, and have the same values on all procs. 2388 2389 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2390 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2391 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2392 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2393 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2394 matrix, ans [DF] as another SeqAIJ matrix. 2395 2396 When d_nz, o_nz parameters are specified, d_nz storage elements are 2397 allocated for every row of the local diagonal submatrix, and o_nz 2398 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2399 One way to choose d_nz and o_nz is to use the max nonzerors per local 2400 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2401 In this case, the values of d_nz,o_nz are: 2402 .vb 2403 proc0 : dnz = 2, o_nz = 2 2404 proc1 : dnz = 3, o_nz = 2 2405 proc2 : dnz = 1, o_nz = 4 2406 .ve 2407 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2408 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2409 for proc3. i.e we are using 12+15+10=37 storage locations to store 2410 34 values. 2411 2412 When d_nnz, o_nnz parameters are specified, the storage is specified 2413 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2414 In the above case the values for d_nnz,o_nnz are: 2415 .vb 2416 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2417 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2418 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2419 .ve 2420 Here the space allocated is sum of all the above values i.e 34, and 2421 hence pre-allocation is perfect. 2422 2423 Level: intermediate 2424 2425 .keywords: matrix, aij, compressed row, sparse, parallel 2426 2427 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2428 MPIAIJ 2429 @*/ 2430 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2431 { 2432 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2433 2434 PetscFunctionBegin; 2435 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2436 if (f) { 2437 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2438 } 2439 PetscFunctionReturn(0); 2440 } 2441 2442 #undef __FUNCT__ 2443 #define __FUNCT__ "MatCreateMPIAIJ" 2444 /*@C 2445 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2446 (the default parallel PETSc format). For good matrix assembly performance 2447 the user should preallocate the matrix storage by setting the parameters 2448 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2449 performance can be increased by more than a factor of 50. 2450 2451 Collective on MPI_Comm 2452 2453 Input Parameters: 2454 + comm - MPI communicator 2455 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2456 This value should be the same as the local size used in creating the 2457 y vector for the matrix-vector product y = Ax. 2458 . n - This value should be the same as the local size used in creating the 2459 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2460 calculated if N is given) For square matrices n is almost always m. 2461 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2462 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2463 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2464 (same value is used for all local rows) 2465 . d_nnz - array containing the number of nonzeros in the various rows of the 2466 DIAGONAL portion of the local submatrix (possibly different for each row) 2467 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2468 The size of this array is equal to the number of local rows, i.e 'm'. 2469 You must leave room for the diagonal entry even if it is zero. 2470 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2471 submatrix (same value is used for all local rows). 2472 - o_nnz - array containing the number of nonzeros in the various rows of the 2473 OFF-DIAGONAL portion of the local submatrix (possibly different for 2474 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2475 structure. The size of this array is equal to the number 2476 of local rows, i.e 'm'. 2477 2478 Output Parameter: 2479 . A - the matrix 2480 2481 Notes: 2482 If the *_nnz parameter is given then the *_nz parameter is ignored 2483 2484 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2485 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2486 storage requirements for this matrix. 2487 2488 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2489 processor than it must be used on all processors that share the object for 2490 that argument. 2491 2492 The user MUST specify either the local or global matrix dimensions 2493 (possibly both). 2494 2495 The parallel matrix is partitioned such that the first m0 rows belong to 2496 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2497 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2498 2499 The DIAGONAL portion of the local submatrix of a processor can be defined 2500 as the submatrix which is obtained by extraction the part corresponding 2501 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2502 first row that belongs to the processor, and r2 is the last row belonging 2503 to the this processor. This is a square mxm matrix. The remaining portion 2504 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2505 2506 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2507 2508 When calling this routine with a single process communicator, a matrix of 2509 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2510 type of communicator, use the construction mechanism: 2511 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2512 2513 By default, this format uses inodes (identical nodes) when possible. 2514 We search for consecutive rows with the same nonzero structure, thereby 2515 reusing matrix information to achieve increased efficiency. 2516 2517 Options Database Keys: 2518 + -mat_no_inode - Do not use inodes 2519 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2520 - -mat_aij_oneindex - Internally use indexing starting at 1 2521 rather than 0. Note that when calling MatSetValues(), 2522 the user still MUST index entries starting at 0! 2523 2524 2525 Example usage: 2526 2527 Consider the following 8x8 matrix with 34 non-zero values, that is 2528 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2529 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2530 as follows: 2531 2532 .vb 2533 1 2 0 | 0 3 0 | 0 4 2534 Proc0 0 5 6 | 7 0 0 | 8 0 2535 9 0 10 | 11 0 0 | 12 0 2536 ------------------------------------- 2537 13 0 14 | 15 16 17 | 0 0 2538 Proc1 0 18 0 | 19 20 21 | 0 0 2539 0 0 0 | 22 23 0 | 24 0 2540 ------------------------------------- 2541 Proc2 25 26 27 | 0 0 28 | 29 0 2542 30 0 0 | 31 32 33 | 0 34 2543 .ve 2544 2545 This can be represented as a collection of submatrices as: 2546 2547 .vb 2548 A B C 2549 D E F 2550 G H I 2551 .ve 2552 2553 Where the submatrices A,B,C are owned by proc0, D,E,F are 2554 owned by proc1, G,H,I are owned by proc2. 2555 2556 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2557 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2558 The 'M','N' parameters are 8,8, and have the same values on all procs. 2559 2560 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2561 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2562 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2563 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2564 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2565 matrix, ans [DF] as another SeqAIJ matrix. 2566 2567 When d_nz, o_nz parameters are specified, d_nz storage elements are 2568 allocated for every row of the local diagonal submatrix, and o_nz 2569 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2570 One way to choose d_nz and o_nz is to use the max nonzerors per local 2571 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2572 In this case, the values of d_nz,o_nz are: 2573 .vb 2574 proc0 : dnz = 2, o_nz = 2 2575 proc1 : dnz = 3, o_nz = 2 2576 proc2 : dnz = 1, o_nz = 4 2577 .ve 2578 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2579 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2580 for proc3. i.e we are using 12+15+10=37 storage locations to store 2581 34 values. 2582 2583 When d_nnz, o_nnz parameters are specified, the storage is specified 2584 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2585 In the above case the values for d_nnz,o_nnz are: 2586 .vb 2587 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2588 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2589 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2590 .ve 2591 Here the space allocated is sum of all the above values i.e 34, and 2592 hence pre-allocation is perfect. 2593 2594 Level: intermediate 2595 2596 .keywords: matrix, aij, compressed row, sparse, parallel 2597 2598 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2599 MPIAIJ 2600 @*/ 2601 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2602 { 2603 PetscErrorCode ierr; 2604 PetscMPIInt size; 2605 2606 PetscFunctionBegin; 2607 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2608 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2609 if (size > 1) { 2610 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2611 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2612 } else { 2613 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2614 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2615 } 2616 PetscFunctionReturn(0); 2617 } 2618 2619 #undef __FUNCT__ 2620 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2621 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2622 { 2623 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2624 2625 PetscFunctionBegin; 2626 *Ad = a->A; 2627 *Ao = a->B; 2628 *colmap = a->garray; 2629 PetscFunctionReturn(0); 2630 } 2631 2632 #undef __FUNCT__ 2633 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2634 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2635 { 2636 PetscErrorCode ierr; 2637 PetscInt i; 2638 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2639 2640 PetscFunctionBegin; 2641 if (coloring->ctype == IS_COLORING_LOCAL) { 2642 ISColoringValue *allcolors,*colors; 2643 ISColoring ocoloring; 2644 2645 /* set coloring for diagonal portion */ 2646 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2647 2648 /* set coloring for off-diagonal portion */ 2649 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2650 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2651 for (i=0; i<a->B->n; i++) { 2652 colors[i] = allcolors[a->garray[i]]; 2653 } 2654 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2655 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2656 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2657 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2658 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2659 ISColoringValue *colors; 2660 PetscInt *larray; 2661 ISColoring ocoloring; 2662 2663 /* set coloring for diagonal portion */ 2664 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2665 for (i=0; i<a->A->n; i++) { 2666 larray[i] = i + a->cstart; 2667 } 2668 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2669 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2670 for (i=0; i<a->A->n; i++) { 2671 colors[i] = coloring->colors[larray[i]]; 2672 } 2673 ierr = PetscFree(larray);CHKERRQ(ierr); 2674 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2675 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2676 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2677 2678 /* set coloring for off-diagonal portion */ 2679 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2680 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2681 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2682 for (i=0; i<a->B->n; i++) { 2683 colors[i] = coloring->colors[larray[i]]; 2684 } 2685 ierr = PetscFree(larray);CHKERRQ(ierr); 2686 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2687 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2688 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2689 } else { 2690 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2691 } 2692 2693 PetscFunctionReturn(0); 2694 } 2695 2696 #if defined(PETSC_HAVE_ADIC) 2697 #undef __FUNCT__ 2698 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2699 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2700 { 2701 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2702 PetscErrorCode ierr; 2703 2704 PetscFunctionBegin; 2705 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2706 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2707 PetscFunctionReturn(0); 2708 } 2709 #endif 2710 2711 #undef __FUNCT__ 2712 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2713 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2714 { 2715 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2716 PetscErrorCode ierr; 2717 2718 PetscFunctionBegin; 2719 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2720 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2721 PetscFunctionReturn(0); 2722 } 2723 2724 #undef __FUNCT__ 2725 #define __FUNCT__ "MatMerge" 2726 /*@C 2727 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2728 matrices from each processor 2729 2730 Collective on MPI_Comm 2731 2732 Input Parameters: 2733 + comm - the communicators the parallel matrix will live on 2734 . inmat - the input sequential matrices 2735 . n - number of local columns (or PETSC_DECIDE) 2736 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2737 2738 Output Parameter: 2739 . outmat - the parallel matrix generated 2740 2741 Level: advanced 2742 2743 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2744 2745 @*/ 2746 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2747 { 2748 PetscErrorCode ierr; 2749 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2750 PetscInt *indx; 2751 PetscScalar *values; 2752 PetscMap columnmap,rowmap; 2753 2754 PetscFunctionBegin; 2755 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2756 /* 2757 PetscMPIInt rank; 2758 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2759 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2760 */ 2761 if (scall == MAT_INITIAL_MATRIX){ 2762 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2763 if (n == PETSC_DECIDE){ 2764 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2765 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2766 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2767 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2768 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2769 } 2770 2771 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2772 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2773 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2774 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2775 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2776 2777 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2778 for (i=0;i<m;i++) { 2779 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2780 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2781 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2782 } 2783 /* This routine will ONLY return MPIAIJ type matrix */ 2784 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2785 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2786 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2787 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2788 2789 } else if (scall == MAT_REUSE_MATRIX){ 2790 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2791 } else { 2792 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2793 } 2794 2795 for (i=0;i<m;i++) { 2796 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2797 I = i + rstart; 2798 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2799 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2800 } 2801 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2802 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2803 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2804 2805 PetscFunctionReturn(0); 2806 } 2807 2808 #undef __FUNCT__ 2809 #define __FUNCT__ "MatFileSplit" 2810 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2811 { 2812 PetscErrorCode ierr; 2813 PetscMPIInt rank; 2814 PetscInt m,N,i,rstart,nnz; 2815 size_t len; 2816 const PetscInt *indx; 2817 PetscViewer out; 2818 char *name; 2819 Mat B; 2820 const PetscScalar *values; 2821 2822 PetscFunctionBegin; 2823 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2824 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2825 /* Should this be the type of the diagonal block of A? */ 2826 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2827 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2828 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2829 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2830 for (i=0;i<m;i++) { 2831 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2832 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2833 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2834 } 2835 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2836 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2837 2838 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2839 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2840 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2841 sprintf(name,"%s.%d",outfile,rank); 2842 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2843 ierr = PetscFree(name); 2844 ierr = MatView(B,out);CHKERRQ(ierr); 2845 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2846 ierr = MatDestroy(B);CHKERRQ(ierr); 2847 PetscFunctionReturn(0); 2848 } 2849 2850 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2851 #undef __FUNCT__ 2852 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2853 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2854 { 2855 PetscErrorCode ierr; 2856 Mat_Merge_SeqsToMPI *merge; 2857 PetscObjectContainer container; 2858 2859 PetscFunctionBegin; 2860 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2861 if (container) { 2862 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2863 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2864 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2865 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2866 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2867 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2868 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2869 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2870 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2871 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2872 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2873 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2874 2875 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2876 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2877 } 2878 ierr = PetscFree(merge);CHKERRQ(ierr); 2879 2880 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2881 PetscFunctionReturn(0); 2882 } 2883 2884 #include "src/mat/utils/freespace.h" 2885 #include "petscbt.h" 2886 static PetscEvent logkey_seqstompinum = 0; 2887 #undef __FUNCT__ 2888 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 2889 /*@C 2890 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2891 matrices from each processor 2892 2893 Collective on MPI_Comm 2894 2895 Input Parameters: 2896 + comm - the communicators the parallel matrix will live on 2897 . seqmat - the input sequential matrices 2898 . m - number of local rows (or PETSC_DECIDE) 2899 . n - number of local columns (or PETSC_DECIDE) 2900 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2901 2902 Output Parameter: 2903 . mpimat - the parallel matrix generated 2904 2905 Level: advanced 2906 2907 Notes: 2908 The dimensions of the sequential matrix in each processor MUST be the same. 2909 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2910 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2911 @*/ 2912 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2913 { 2914 PetscErrorCode ierr; 2915 MPI_Comm comm=mpimat->comm; 2916 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2917 PetscMPIInt size,rank,taga,*len_s; 2918 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2919 PetscInt proc,m; 2920 PetscInt **buf_ri,**buf_rj; 2921 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2922 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2923 MPI_Request *s_waits,*r_waits; 2924 MPI_Status *status; 2925 MatScalar *aa=a->a,**abuf_r,*ba_i; 2926 Mat_Merge_SeqsToMPI *merge; 2927 PetscObjectContainer container; 2928 2929 PetscFunctionBegin; 2930 if (!logkey_seqstompinum) { 2931 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2932 } 2933 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2934 2935 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2936 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2937 2938 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2939 if (container) { 2940 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2941 } 2942 bi = merge->bi; 2943 bj = merge->bj; 2944 buf_ri = merge->buf_ri; 2945 buf_rj = merge->buf_rj; 2946 2947 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2948 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2949 len_s = merge->len_s; 2950 2951 /* send and recv matrix values */ 2952 /*-----------------------------*/ 2953 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2954 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2955 2956 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2957 for (proc=0,k=0; proc<size; proc++){ 2958 if (!len_s[proc]) continue; 2959 i = owners[proc]; 2960 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2961 k++; 2962 } 2963 2964 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2965 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2966 ierr = PetscFree(status);CHKERRQ(ierr); 2967 2968 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2969 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2970 2971 /* insert mat values of mpimat */ 2972 /*----------------------------*/ 2973 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2974 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2975 nextrow = buf_ri_k + merge->nrecv; 2976 nextai = nextrow + merge->nrecv; 2977 2978 for (k=0; k<merge->nrecv; k++){ 2979 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2980 nrows = *(buf_ri_k[k]); 2981 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2982 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2983 } 2984 2985 /* set values of ba */ 2986 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2987 for (i=0; i<m; i++) { 2988 arow = owners[rank] + i; 2989 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2990 bnzi = bi[i+1] - bi[i]; 2991 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2992 2993 /* add local non-zero vals of this proc's seqmat into ba */ 2994 anzi = ai[arow+1] - ai[arow]; 2995 aj = a->j + ai[arow]; 2996 aa = a->a + ai[arow]; 2997 nextaj = 0; 2998 for (j=0; nextaj<anzi; j++){ 2999 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3000 ba_i[j] += aa[nextaj++]; 3001 } 3002 } 3003 3004 /* add received vals into ba */ 3005 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3006 /* i-th row */ 3007 if (i == *nextrow[k]) { 3008 anzi = *(nextai[k]+1) - *nextai[k]; 3009 aj = buf_rj[k] + *(nextai[k]); 3010 aa = abuf_r[k] + *(nextai[k]); 3011 nextaj = 0; 3012 for (j=0; nextaj<anzi; j++){ 3013 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3014 ba_i[j] += aa[nextaj++]; 3015 } 3016 } 3017 nextrow[k]++; nextai[k]++; 3018 } 3019 } 3020 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3021 } 3022 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3023 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3024 3025 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3026 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3027 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3028 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3029 PetscFunctionReturn(0); 3030 } 3031 3032 static PetscEvent logkey_seqstompisym = 0; 3033 #undef __FUNCT__ 3034 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3035 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3036 { 3037 PetscErrorCode ierr; 3038 Mat B_mpi; 3039 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3040 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3041 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3042 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3043 PetscInt len,proc,*dnz,*onz; 3044 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3045 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3046 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3047 MPI_Status *status; 3048 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3049 PetscBT lnkbt; 3050 Mat_Merge_SeqsToMPI *merge; 3051 PetscObjectContainer container; 3052 3053 PetscFunctionBegin; 3054 if (!logkey_seqstompisym) { 3055 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3056 } 3057 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3058 3059 /* make sure it is a PETSc comm */ 3060 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3061 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3062 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3063 3064 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3065 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3066 3067 /* determine row ownership */ 3068 /*---------------------------------------------------------*/ 3069 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3070 if (m == PETSC_DECIDE) { 3071 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3072 } else { 3073 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3074 } 3075 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3076 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3077 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3078 3079 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3080 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3081 3082 /* determine the number of messages to send, their lengths */ 3083 /*---------------------------------------------------------*/ 3084 len_s = merge->len_s; 3085 3086 len = 0; /* length of buf_si[] */ 3087 merge->nsend = 0; 3088 for (proc=0; proc<size; proc++){ 3089 len_si[proc] = 0; 3090 if (proc == rank){ 3091 len_s[proc] = 0; 3092 } else { 3093 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3094 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3095 } 3096 if (len_s[proc]) { 3097 merge->nsend++; 3098 nrows = 0; 3099 for (i=owners[proc]; i<owners[proc+1]; i++){ 3100 if (ai[i+1] > ai[i]) nrows++; 3101 } 3102 len_si[proc] = 2*(nrows+1); 3103 len += len_si[proc]; 3104 } 3105 } 3106 3107 /* determine the number and length of messages to receive for ij-structure */ 3108 /*-------------------------------------------------------------------------*/ 3109 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3110 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3111 3112 /* post the Irecv of j-structure */ 3113 /*-------------------------------*/ 3114 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3115 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3116 3117 /* post the Isend of j-structure */ 3118 /*--------------------------------*/ 3119 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3120 sj_waits = si_waits + merge->nsend; 3121 3122 for (proc=0, k=0; proc<size; proc++){ 3123 if (!len_s[proc]) continue; 3124 i = owners[proc]; 3125 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3126 k++; 3127 } 3128 3129 /* receives and sends of j-structure are complete */ 3130 /*------------------------------------------------*/ 3131 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3132 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3133 3134 /* send and recv i-structure */ 3135 /*---------------------------*/ 3136 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3137 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3138 3139 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3140 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3141 for (proc=0,k=0; proc<size; proc++){ 3142 if (!len_s[proc]) continue; 3143 /* form outgoing message for i-structure: 3144 buf_si[0]: nrows to be sent 3145 [1:nrows]: row index (global) 3146 [nrows+1:2*nrows+1]: i-structure index 3147 */ 3148 /*-------------------------------------------*/ 3149 nrows = len_si[proc]/2 - 1; 3150 buf_si_i = buf_si + nrows+1; 3151 buf_si[0] = nrows; 3152 buf_si_i[0] = 0; 3153 nrows = 0; 3154 for (i=owners[proc]; i<owners[proc+1]; i++){ 3155 anzi = ai[i+1] - ai[i]; 3156 if (anzi) { 3157 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3158 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3159 nrows++; 3160 } 3161 } 3162 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3163 k++; 3164 buf_si += len_si[proc]; 3165 } 3166 3167 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3168 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3169 3170 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr); 3171 for (i=0; i<merge->nrecv; i++){ 3172 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr); 3173 } 3174 3175 ierr = PetscFree(len_si);CHKERRQ(ierr); 3176 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3177 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3178 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3179 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3180 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3181 ierr = PetscFree(status);CHKERRQ(ierr); 3182 3183 /* compute a local seq matrix in each processor */ 3184 /*----------------------------------------------*/ 3185 /* allocate bi array and free space for accumulating nonzero column info */ 3186 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3187 bi[0] = 0; 3188 3189 /* create and initialize a linked list */ 3190 nlnk = N+1; 3191 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3192 3193 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3194 len = 0; 3195 len = ai[owners[rank+1]] - ai[owners[rank]]; 3196 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3197 current_space = free_space; 3198 3199 /* determine symbolic info for each local row */ 3200 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3201 nextrow = buf_ri_k + merge->nrecv; 3202 nextai = nextrow + merge->nrecv; 3203 for (k=0; k<merge->nrecv; k++){ 3204 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3205 nrows = *buf_ri_k[k]; 3206 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3207 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3208 } 3209 3210 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3211 len = 0; 3212 for (i=0;i<m;i++) { 3213 bnzi = 0; 3214 /* add local non-zero cols of this proc's seqmat into lnk */ 3215 arow = owners[rank] + i; 3216 anzi = ai[arow+1] - ai[arow]; 3217 aj = a->j + ai[arow]; 3218 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3219 bnzi += nlnk; 3220 /* add received col data into lnk */ 3221 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3222 if (i == *nextrow[k]) { /* i-th row */ 3223 anzi = *(nextai[k]+1) - *nextai[k]; 3224 aj = buf_rj[k] + *nextai[k]; 3225 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3226 bnzi += nlnk; 3227 nextrow[k]++; nextai[k]++; 3228 } 3229 } 3230 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3231 3232 /* if free space is not available, make more free space */ 3233 if (current_space->local_remaining<bnzi) { 3234 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3235 nspacedouble++; 3236 } 3237 /* copy data into free space, then initialize lnk */ 3238 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3239 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3240 3241 current_space->array += bnzi; 3242 current_space->local_used += bnzi; 3243 current_space->local_remaining -= bnzi; 3244 3245 bi[i+1] = bi[i] + bnzi; 3246 } 3247 3248 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3249 3250 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3251 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3252 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3253 3254 /* create symbolic parallel matrix B_mpi */ 3255 /*---------------------------------------*/ 3256 if (n==PETSC_DECIDE) { 3257 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3258 } else { 3259 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3260 } 3261 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3262 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3263 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3264 3265 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3266 B_mpi->assembled = PETSC_FALSE; 3267 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3268 merge->bi = bi; 3269 merge->bj = bj; 3270 merge->buf_ri = buf_ri; 3271 merge->buf_rj = buf_rj; 3272 merge->coi = PETSC_NULL; 3273 merge->coj = PETSC_NULL; 3274 merge->owners_co = PETSC_NULL; 3275 3276 /* attach the supporting struct to B_mpi for reuse */ 3277 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3278 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3279 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3280 *mpimat = B_mpi; 3281 3282 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3283 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3284 PetscFunctionReturn(0); 3285 } 3286 3287 static PetscEvent logkey_seqstompi = 0; 3288 #undef __FUNCT__ 3289 #define __FUNCT__ "MatMerge_SeqsToMPI" 3290 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3291 { 3292 PetscErrorCode ierr; 3293 3294 PetscFunctionBegin; 3295 if (!logkey_seqstompi) { 3296 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3297 } 3298 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3299 if (scall == MAT_INITIAL_MATRIX){ 3300 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3301 } 3302 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3303 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3304 PetscFunctionReturn(0); 3305 } 3306 static PetscEvent logkey_getlocalmat = 0; 3307 #undef __FUNCT__ 3308 #define __FUNCT__ "MatGetLocalMat" 3309 /*@C 3310 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3311 3312 Not Collective 3313 3314 Input Parameters: 3315 + A - the matrix 3316 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3317 3318 Output Parameter: 3319 . A_loc - the local sequential matrix generated 3320 3321 Level: developer 3322 3323 @*/ 3324 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3325 { 3326 PetscErrorCode ierr; 3327 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3328 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3329 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3330 PetscScalar *aa=a->a,*ba=b->a,*ca; 3331 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3332 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3333 3334 PetscFunctionBegin; 3335 if (!logkey_getlocalmat) { 3336 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3337 } 3338 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3339 if (scall == MAT_INITIAL_MATRIX){ 3340 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3341 ci[0] = 0; 3342 for (i=0; i<am; i++){ 3343 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3344 } 3345 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3346 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3347 k = 0; 3348 for (i=0; i<am; i++) { 3349 ncols_o = bi[i+1] - bi[i]; 3350 ncols_d = ai[i+1] - ai[i]; 3351 /* off-diagonal portion of A */ 3352 for (jo=0; jo<ncols_o; jo++) { 3353 col = cmap[*bj]; 3354 if (col >= cstart) break; 3355 cj[k] = col; bj++; 3356 ca[k++] = *ba++; 3357 } 3358 /* diagonal portion of A */ 3359 for (j=0; j<ncols_d; j++) { 3360 cj[k] = cstart + *aj++; 3361 ca[k++] = *aa++; 3362 } 3363 /* off-diagonal portion of A */ 3364 for (j=jo; j<ncols_o; j++) { 3365 cj[k] = cmap[*bj++]; 3366 ca[k++] = *ba++; 3367 } 3368 } 3369 /* put together the new matrix */ 3370 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3371 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3372 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3373 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3374 mat->freedata = PETSC_TRUE; 3375 mat->nonew = 0; 3376 } else if (scall == MAT_REUSE_MATRIX){ 3377 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3378 ci = mat->i; cj = mat->j; ca = mat->a; 3379 for (i=0; i<am; i++) { 3380 /* off-diagonal portion of A */ 3381 ncols_o = bi[i+1] - bi[i]; 3382 for (jo=0; jo<ncols_o; jo++) { 3383 col = cmap[*bj]; 3384 if (col >= cstart) break; 3385 *ca++ = *ba++; bj++; 3386 } 3387 /* diagonal portion of A */ 3388 ncols_d = ai[i+1] - ai[i]; 3389 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3390 /* off-diagonal portion of A */ 3391 for (j=jo; j<ncols_o; j++) { 3392 *ca++ = *ba++; bj++; 3393 } 3394 } 3395 } else { 3396 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3397 } 3398 3399 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3400 PetscFunctionReturn(0); 3401 } 3402 3403 static PetscEvent logkey_getlocalmatcondensed = 0; 3404 #undef __FUNCT__ 3405 #define __FUNCT__ "MatGetLocalMatCondensed" 3406 /*@C 3407 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3408 3409 Not Collective 3410 3411 Input Parameters: 3412 + A - the matrix 3413 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3414 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3415 3416 Output Parameter: 3417 . A_loc - the local sequential matrix generated 3418 3419 Level: developer 3420 3421 @*/ 3422 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3423 { 3424 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3425 PetscErrorCode ierr; 3426 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3427 IS isrowa,iscola; 3428 Mat *aloc; 3429 3430 PetscFunctionBegin; 3431 if (!logkey_getlocalmatcondensed) { 3432 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3433 } 3434 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3435 if (!row){ 3436 start = a->rstart; end = a->rend; 3437 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3438 } else { 3439 isrowa = *row; 3440 } 3441 if (!col){ 3442 start = a->cstart; 3443 cmap = a->garray; 3444 nzA = a->A->n; 3445 nzB = a->B->n; 3446 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3447 ncols = 0; 3448 for (i=0; i<nzB; i++) { 3449 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3450 else break; 3451 } 3452 imark = i; 3453 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3454 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3455 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3456 ierr = PetscFree(idx);CHKERRQ(ierr); 3457 } else { 3458 iscola = *col; 3459 } 3460 if (scall != MAT_INITIAL_MATRIX){ 3461 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3462 aloc[0] = *A_loc; 3463 } 3464 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3465 *A_loc = aloc[0]; 3466 ierr = PetscFree(aloc);CHKERRQ(ierr); 3467 if (!row){ 3468 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3469 } 3470 if (!col){ 3471 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3472 } 3473 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3474 PetscFunctionReturn(0); 3475 } 3476 3477 static PetscEvent logkey_GetBrowsOfAcols = 0; 3478 #undef __FUNCT__ 3479 #define __FUNCT__ "MatGetBrowsOfAcols" 3480 /*@C 3481 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3482 3483 Collective on Mat 3484 3485 Input Parameters: 3486 + A,B - the matrices in mpiaij format 3487 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3488 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3489 3490 Output Parameter: 3491 + rowb, colb - index sets of rows and columns of B to extract 3492 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3493 - B_seq - the sequential matrix generated 3494 3495 Level: developer 3496 3497 @*/ 3498 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3499 { 3500 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3501 PetscErrorCode ierr; 3502 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3503 IS isrowb,iscolb; 3504 Mat *bseq; 3505 3506 PetscFunctionBegin; 3507 if (a->cstart != b->rstart || a->cend != b->rend){ 3508 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3509 } 3510 if (!logkey_GetBrowsOfAcols) { 3511 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3512 } 3513 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3514 3515 if (scall == MAT_INITIAL_MATRIX){ 3516 start = a->cstart; 3517 cmap = a->garray; 3518 nzA = a->A->n; 3519 nzB = a->B->n; 3520 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3521 ncols = 0; 3522 for (i=0; i<nzB; i++) { /* row < local row index */ 3523 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3524 else break; 3525 } 3526 imark = i; 3527 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3528 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3529 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3530 ierr = PetscFree(idx);CHKERRQ(ierr); 3531 *brstart = imark; 3532 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3533 } else { 3534 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3535 isrowb = *rowb; iscolb = *colb; 3536 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3537 bseq[0] = *B_seq; 3538 } 3539 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3540 *B_seq = bseq[0]; 3541 ierr = PetscFree(bseq);CHKERRQ(ierr); 3542 if (!rowb){ 3543 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3544 } else { 3545 *rowb = isrowb; 3546 } 3547 if (!colb){ 3548 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3549 } else { 3550 *colb = iscolb; 3551 } 3552 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3553 PetscFunctionReturn(0); 3554 } 3555 3556 static PetscEvent logkey_GetBrowsOfAocols = 0; 3557 #undef __FUNCT__ 3558 #define __FUNCT__ "MatGetBrowsOfAoCols" 3559 /*@C 3560 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3561 of the OFF-DIAGONAL portion of local A 3562 3563 Collective on Mat 3564 3565 Input Parameters: 3566 + A,B - the matrices in mpiaij format 3567 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3568 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3569 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3570 3571 Output Parameter: 3572 + B_oth - the sequential matrix generated 3573 3574 Level: developer 3575 3576 @*/ 3577 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3578 { 3579 VecScatter_MPI_General *gen_to,*gen_from; 3580 PetscErrorCode ierr; 3581 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3582 Mat_SeqAIJ *b_oth; 3583 VecScatter ctx=a->Mvctx; 3584 MPI_Comm comm=ctx->comm; 3585 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3586 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3587 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3588 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3589 MPI_Request *rwaits,*swaits; 3590 MPI_Status *sstatus,rstatus; 3591 PetscInt *cols; 3592 PetscScalar *vals; 3593 PetscMPIInt j; 3594 3595 PetscFunctionBegin; 3596 if (a->cstart != b->rstart || a->cend != b->rend){ 3597 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3598 } 3599 if (!logkey_GetBrowsOfAocols) { 3600 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3601 } 3602 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3603 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3604 3605 gen_to = (VecScatter_MPI_General*)ctx->todata; 3606 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3607 rvalues = gen_from->values; /* holds the length of sending row */ 3608 svalues = gen_to->values; /* holds the length of receiving row */ 3609 nrecvs = gen_from->n; 3610 nsends = gen_to->n; 3611 rwaits = gen_from->requests; 3612 swaits = gen_to->requests; 3613 srow = gen_to->indices; /* local row index to be sent */ 3614 rstarts = gen_from->starts; 3615 sstarts = gen_to->starts; 3616 rprocs = gen_from->procs; 3617 sprocs = gen_to->procs; 3618 sstatus = gen_to->sstatus; 3619 3620 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3621 if (scall == MAT_INITIAL_MATRIX){ 3622 /* i-array */ 3623 /*---------*/ 3624 /* post receives */ 3625 for (i=0; i<nrecvs; i++){ 3626 rowlen = (PetscInt*)rvalues + rstarts[i]; 3627 nrows = rstarts[i+1]-rstarts[i]; 3628 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3629 } 3630 3631 /* pack the outgoing message */ 3632 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3633 rstartsj = sstartsj + nsends +1; 3634 sstartsj[0] = 0; rstartsj[0] = 0; 3635 len = 0; /* total length of j or a array to be sent */ 3636 k = 0; 3637 for (i=0; i<nsends; i++){ 3638 rowlen = (PetscInt*)svalues + sstarts[i]; 3639 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3640 for (j=0; j<nrows; j++) { 3641 row = srow[k] + b->rowners[rank]; /* global row idx */ 3642 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3643 len += rowlen[j]; 3644 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3645 k++; 3646 } 3647 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3648 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3649 } 3650 /* recvs and sends of i-array are completed */ 3651 i = nrecvs; 3652 while (i--) { 3653 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3654 } 3655 if (nsends) { 3656 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3657 } 3658 /* allocate buffers for sending j and a arrays */ 3659 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3660 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3661 3662 /* create i-array of B_oth */ 3663 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3664 b_othi[0] = 0; 3665 len = 0; /* total length of j or a array to be received */ 3666 k = 0; 3667 for (i=0; i<nrecvs; i++){ 3668 rowlen = (PetscInt*)rvalues + rstarts[i]; 3669 nrows = rstarts[i+1]-rstarts[i]; 3670 for (j=0; j<nrows; j++) { 3671 b_othi[k+1] = b_othi[k] + rowlen[j]; 3672 len += rowlen[j]; k++; 3673 } 3674 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3675 } 3676 3677 /* allocate space for j and a arrrays of B_oth */ 3678 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3679 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3680 3681 /* j-array */ 3682 /*---------*/ 3683 /* post receives of j-array */ 3684 for (i=0; i<nrecvs; i++){ 3685 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3686 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3687 } 3688 k = 0; 3689 for (i=0; i<nsends; i++){ 3690 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3691 bufJ = bufj+sstartsj[i]; 3692 for (j=0; j<nrows; j++) { 3693 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3694 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3695 for (l=0; l<ncols; l++){ 3696 *bufJ++ = cols[l]; 3697 } 3698 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3699 } 3700 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3701 } 3702 3703 /* recvs and sends of j-array are completed */ 3704 i = nrecvs; 3705 while (i--) { 3706 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3707 } 3708 if (nsends) { 3709 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3710 } 3711 } else if (scall == MAT_REUSE_MATRIX){ 3712 sstartsj = *startsj; 3713 rstartsj = sstartsj + nsends +1; 3714 bufa = *bufa_ptr; 3715 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3716 b_otha = b_oth->a; 3717 } else { 3718 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3719 } 3720 3721 /* a-array */ 3722 /*---------*/ 3723 /* post receives of a-array */ 3724 for (i=0; i<nrecvs; i++){ 3725 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3726 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3727 } 3728 k = 0; 3729 for (i=0; i<nsends; i++){ 3730 nrows = sstarts[i+1]-sstarts[i]; 3731 bufA = bufa+sstartsj[i]; 3732 for (j=0; j<nrows; j++) { 3733 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3734 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3735 for (l=0; l<ncols; l++){ 3736 *bufA++ = vals[l]; 3737 } 3738 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3739 3740 } 3741 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3742 } 3743 /* recvs and sends of a-array are completed */ 3744 i = nrecvs; 3745 while (i--) { 3746 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3747 } 3748 if (nsends) { 3749 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3750 } 3751 3752 if (scall == MAT_INITIAL_MATRIX){ 3753 /* put together the new matrix */ 3754 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3755 3756 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3757 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3758 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3759 b_oth->freedata = PETSC_TRUE; 3760 b_oth->nonew = 0; 3761 3762 ierr = PetscFree(bufj);CHKERRQ(ierr); 3763 if (!startsj || !bufa_ptr){ 3764 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3765 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3766 } else { 3767 *startsj = sstartsj; 3768 *bufa_ptr = bufa; 3769 } 3770 } 3771 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3772 3773 PetscFunctionReturn(0); 3774 } 3775 3776 /*MC 3777 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3778 3779 Options Database Keys: 3780 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3781 3782 Level: beginner 3783 3784 .seealso: MatCreateMPIAIJ 3785 M*/ 3786 3787 EXTERN_C_BEGIN 3788 #undef __FUNCT__ 3789 #define __FUNCT__ "MatCreate_MPIAIJ" 3790 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3791 { 3792 Mat_MPIAIJ *b; 3793 PetscErrorCode ierr; 3794 PetscInt i; 3795 PetscMPIInt size; 3796 3797 PetscFunctionBegin; 3798 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3799 3800 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3801 B->data = (void*)b; 3802 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3803 B->factor = 0; 3804 B->bs = 1; 3805 B->assembled = PETSC_FALSE; 3806 B->mapping = 0; 3807 3808 B->insertmode = NOT_SET_VALUES; 3809 b->size = size; 3810 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3811 3812 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3813 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3814 3815 /* the information in the maps duplicates the information computed below, eventually 3816 we should remove the duplicate information that is not contained in the maps */ 3817 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3818 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3819 3820 /* build local table of row and column ownerships */ 3821 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3822 ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3823 b->cowners = b->rowners + b->size + 2; 3824 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3825 b->rowners[0] = 0; 3826 for (i=2; i<=b->size; i++) { 3827 b->rowners[i] += b->rowners[i-1]; 3828 } 3829 b->rstart = b->rowners[b->rank]; 3830 b->rend = b->rowners[b->rank+1]; 3831 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3832 b->cowners[0] = 0; 3833 for (i=2; i<=b->size; i++) { 3834 b->cowners[i] += b->cowners[i-1]; 3835 } 3836 b->cstart = b->cowners[b->rank]; 3837 b->cend = b->cowners[b->rank+1]; 3838 3839 /* build cache for off array entries formed */ 3840 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3841 b->donotstash = PETSC_FALSE; 3842 b->colmap = 0; 3843 b->garray = 0; 3844 b->roworiented = PETSC_TRUE; 3845 3846 /* stuff used for matrix vector multiply */ 3847 b->lvec = PETSC_NULL; 3848 b->Mvctx = PETSC_NULL; 3849 3850 /* stuff for MatGetRow() */ 3851 b->rowindices = 0; 3852 b->rowvalues = 0; 3853 b->getrowactive = PETSC_FALSE; 3854 3855 /* Explicitly create 2 MATSEQAIJ matrices. */ 3856 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3857 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3858 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3859 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3860 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3861 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3862 3863 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3864 "MatStoreValues_MPIAIJ", 3865 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3866 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3867 "MatRetrieveValues_MPIAIJ", 3868 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3869 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3870 "MatGetDiagonalBlock_MPIAIJ", 3871 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3872 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3873 "MatIsTranspose_MPIAIJ", 3874 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3875 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3876 "MatMPIAIJSetPreallocation_MPIAIJ", 3877 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3878 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3879 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3880 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3881 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3882 "MatDiagonalScaleLocal_MPIAIJ", 3883 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3884 PetscFunctionReturn(0); 3885 } 3886 EXTERN_C_END 3887