1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 #define CHUNKSIZE 15 37 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 38 { \ 39 if (lastcol1 > col) low1 = 0; else high1 = nrow1; \ 40 lastcol1 = col;\ 41 while (high1-low1 > 5) { \ 42 t = (low1+high1)/2; \ 43 if (rp1[t] > col) high1 = t; \ 44 else low1 = t; \ 45 } \ 46 for (_i=low1; _i<high1; _i++) { \ 47 if (rp1[_i] > col) break; \ 48 if (rp1[_i] == col) { \ 49 if (addv == ADD_VALUES) ap1[_i] += value; \ 50 else ap1[_i] = value; \ 51 goto a_noinsert; \ 52 } \ 53 } \ 54 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 55 if (nonew == 1) goto a_noinsert; \ 56 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 57 if (nrow1 >= rmax1) { \ 58 /* there is no extra room in row, therefore enlarge */ \ 59 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 60 PetscScalar *new_a; \ 61 \ 62 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 63 \ 64 /* malloc new storage space */ \ 65 ierr = PetscMalloc3(new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,am+1,PetscInt,&new_i);CHKERRQ(ierr);\ 66 \ 67 /* copy over old data into new slots */ \ 68 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 69 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 70 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow1)*sizeof(PetscInt));CHKERRQ(ierr); \ 71 len = (new_nz - CHUNKSIZE - ai[row] - nrow1); \ 72 ierr = PetscMemcpy(new_j+ai[row]+nrow1+CHUNKSIZE,aj+ai[row]+nrow1,len*sizeof(PetscInt));CHKERRQ(ierr); \ 73 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow1)*sizeof(PetscScalar));CHKERRQ(ierr); \ 74 ierr = PetscMemcpy(new_a+ai[row]+nrow1+CHUNKSIZE,aa+ai[row]+nrow1,len*sizeof(PetscScalar));CHKERRQ(ierr); \ 75 /* free up old matrix storage */ \ 76 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr);\ 77 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 78 a->singlemalloc = PETSC_TRUE; \ 79 \ 80 rp1 = aj + ai[row]; ap1 = aa + ai[row]; \ 81 rmax1 = aimax[row] = aimax[row] + CHUNKSIZE; \ 82 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \ 83 a->maxnz += CHUNKSIZE; \ 84 a->reallocs++; \ 85 } \ 86 N = nrow1++ - 1; a->nz++; \ 87 /* shift up all the later entries in this row */ \ 88 for (ii=N; ii>=_i; ii--) { \ 89 rp1[ii+1] = rp1[ii]; \ 90 ap1[ii+1] = ap1[ii]; \ 91 } \ 92 rp1[_i] = col; \ 93 ap1[_i] = value; \ 94 a_noinsert: ; \ 95 ailen[row] = nrow1; \ 96 } 97 98 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 99 { \ 100 if (lastcol2 > col) low2 = 0; else high2 = nrow2; \ 101 lastcol2 = col;\ 102 while (high2-low2 > 5) { \ 103 t = (low2+high2)/2; \ 104 if (rp2[t] > col) high2 = t; \ 105 else low2 = t; \ 106 } \ 107 for (_i=low2; _i<high2; _i++) { \ 108 if (rp2[_i] > col) break; \ 109 if (rp2[_i] == col) { \ 110 if (addv == ADD_VALUES) ap2[_i] += value; \ 111 else ap2[_i] = value; \ 112 goto b_noinsert; \ 113 } \ 114 } \ 115 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 116 if (nonew == 1) goto b_noinsert; \ 117 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 118 if (nrow2 >= rmax2) { \ 119 /* there is no extra room in row, therefore enlarge */ \ 120 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 121 PetscScalar *new_a; \ 122 \ 123 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 124 \ 125 /* malloc new storage space */ \ 126 ierr = PetscMalloc3(new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,bm+1,PetscInt,&new_i);CHKERRQ(ierr);\ 127 \ 128 /* copy over old data into new slots */ \ 129 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 130 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 131 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow2)*sizeof(PetscInt));CHKERRQ(ierr); \ 132 len = (new_nz - CHUNKSIZE - bi[row] - nrow2); \ 133 ierr = PetscMemcpy(new_j+bi[row]+nrow2+CHUNKSIZE,bj+bi[row]+nrow2,len*sizeof(PetscInt));CHKERRQ(ierr); \ 134 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow2)*sizeof(PetscScalar));CHKERRQ(ierr); \ 135 ierr = PetscMemcpy(new_a+bi[row]+nrow2+CHUNKSIZE,ba+bi[row]+nrow2,len*sizeof(PetscScalar));CHKERRQ(ierr); \ 136 /* free up old matrix storage */ \ 137 ierr = MatSeqXAIJFreeAIJ(b->singlemalloc,b->a,b->j,b->i);CHKERRQ(ierr);\ 138 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 139 b->singlemalloc = PETSC_TRUE; \ 140 \ 141 rp2 = bj + bi[row]; ap2 = ba + bi[row]; \ 142 rmax2 = bimax[row] = bimax[row] + CHUNKSIZE; \ 143 ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \ 144 b->maxnz += CHUNKSIZE; \ 145 b->reallocs++; \ 146 } \ 147 N = nrow2++ - 1; b->nz++; \ 148 /* shift up all the later entries in this row */ \ 149 for (ii=N; ii>=_i; ii--) { \ 150 rp2[ii+1] = rp2[ii]; \ 151 ap2[ii+1] = ap2[ii]; \ 152 } \ 153 rp2[_i] = col; \ 154 ap2[_i] = value; \ 155 b_noinsert: ; \ 156 bilen[row] = nrow2; \ 157 } 158 159 #undef __FUNCT__ 160 #define __FUNCT__ "MatSetValues_MPIAIJ" 161 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 162 { 163 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 164 PetscScalar value; 165 PetscErrorCode ierr; 166 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 167 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 168 PetscTruth roworiented = aij->roworiented; 169 170 /* Some Variables required in the macro */ 171 Mat A = aij->A; 172 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 173 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 174 PetscScalar *aa = a->a; 175 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 176 Mat B = aij->B; 177 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 178 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 179 PetscScalar *ba = b->a; 180 181 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 182 PetscInt nonew = a->nonew; 183 PetscScalar *ap1,*ap2; 184 185 PetscFunctionBegin; 186 for (i=0; i<m; i++) { 187 if (im[i] < 0) continue; 188 #if defined(PETSC_USE_DEBUG) 189 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 190 #endif 191 if (im[i] >= rstart && im[i] < rend) { 192 row = im[i] - rstart; 193 lastcol1 = -1; 194 rp1 = aj + ai[row]; 195 ap1 = aa + ai[row]; 196 rmax1 = aimax[row]; 197 nrow1 = ailen[row]; 198 low1 = 0; 199 high1 = nrow1; 200 lastcol2 = -1; 201 rp2 = bj + bi[row]; 202 ap2 = ba + bi[row]; 203 rmax2 = bimax[row]; 204 nrow2 = bilen[row]; 205 low2 = 0; 206 high2 = nrow2; 207 208 for (j=0; j<n; j++) { 209 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 210 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 211 if (in[j] >= cstart && in[j] < cend){ 212 col = in[j] - cstart; 213 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 214 } else if (in[j] < 0) continue; 215 #if defined(PETSC_USE_DEBUG) 216 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 217 #endif 218 else { 219 if (mat->was_assembled) { 220 if (!aij->colmap) { 221 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 222 } 223 #if defined (PETSC_USE_CTABLE) 224 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 225 col--; 226 #else 227 col = aij->colmap[in[j]] - 1; 228 #endif 229 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 230 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 231 col = in[j]; 232 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 233 B = aij->B; 234 b = (Mat_SeqAIJ*)B->data; 235 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 236 rp2 = bj + bi[row]; 237 ap2 = ba + bi[row]; 238 rmax2 = bimax[row]; 239 nrow2 = bilen[row]; 240 low2 = 0; 241 high2 = nrow2; 242 ba = b->a; 243 } 244 } else col = in[j]; 245 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 246 } 247 } 248 } else { 249 if (!aij->donotstash) { 250 if (roworiented) { 251 if (ignorezeroentries && v[i*n] == 0.0) continue; 252 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 253 } else { 254 if (ignorezeroentries && v[i] == 0.0) continue; 255 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 256 } 257 } 258 } 259 } 260 PetscFunctionReturn(0); 261 } 262 263 #undef __FUNCT__ 264 #define __FUNCT__ "MatGetValues_MPIAIJ" 265 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 266 { 267 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 268 PetscErrorCode ierr; 269 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 270 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 271 272 PetscFunctionBegin; 273 for (i=0; i<m; i++) { 274 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 275 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 276 if (idxm[i] >= rstart && idxm[i] < rend) { 277 row = idxm[i] - rstart; 278 for (j=0; j<n; j++) { 279 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 280 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 281 if (idxn[j] >= cstart && idxn[j] < cend){ 282 col = idxn[j] - cstart; 283 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 284 } else { 285 if (!aij->colmap) { 286 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 287 } 288 #if defined (PETSC_USE_CTABLE) 289 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 290 col --; 291 #else 292 col = aij->colmap[idxn[j]] - 1; 293 #endif 294 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 295 else { 296 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 297 } 298 } 299 } 300 } else { 301 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 302 } 303 } 304 PetscFunctionReturn(0); 305 } 306 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 309 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 310 { 311 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 312 PetscErrorCode ierr; 313 PetscInt nstash,reallocs; 314 InsertMode addv; 315 316 PetscFunctionBegin; 317 if (aij->donotstash) { 318 PetscFunctionReturn(0); 319 } 320 321 /* make sure all processors are either in INSERTMODE or ADDMODE */ 322 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 323 if (addv == (ADD_VALUES|INSERT_VALUES)) { 324 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 325 } 326 mat->insertmode = addv; /* in case this processor had no cache */ 327 328 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 329 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 330 ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 331 PetscFunctionReturn(0); 332 } 333 334 #undef __FUNCT__ 335 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 336 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 337 { 338 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 339 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 340 PetscErrorCode ierr; 341 PetscMPIInt n; 342 PetscInt i,j,rstart,ncols,flg; 343 PetscInt *row,*col,other_disassembled; 344 PetscScalar *val; 345 InsertMode addv = mat->insertmode; 346 347 PetscFunctionBegin; 348 if (!aij->donotstash) { 349 while (1) { 350 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 351 if (!flg) break; 352 353 for (i=0; i<n;) { 354 /* Now identify the consecutive vals belonging to the same row */ 355 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 356 if (j < n) ncols = j-i; 357 else ncols = n-i; 358 /* Now assemble all these values with a single function call */ 359 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 360 i = j; 361 } 362 } 363 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 364 } 365 a->compressedrow.use = PETSC_FALSE; 366 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 367 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 368 369 /* determine if any processor has disassembled, if so we must 370 also disassemble ourselfs, in order that we may reassemble. */ 371 /* 372 if nonzero structure of submatrix B cannot change then we know that 373 no processor disassembled thus we can skip this stuff 374 */ 375 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 376 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 377 if (mat->was_assembled && !other_disassembled) { 378 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 379 } 380 } 381 /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */ 382 b = (Mat_SeqAIJ *)aij->B->data; 383 384 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 385 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 386 } 387 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 388 b->compressedrow.use = PETSC_TRUE; 389 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 390 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 391 392 if (aij->rowvalues) { 393 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 394 aij->rowvalues = 0; 395 } 396 397 /* used by MatAXPY() */ 398 a->xtoy = 0; b->xtoy = 0; 399 a->XtoY = 0; b->XtoY = 0; 400 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 406 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 407 { 408 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 409 PetscErrorCode ierr; 410 411 PetscFunctionBegin; 412 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 413 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatZeroRows_MPIAIJ" 419 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 420 { 421 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 422 PetscErrorCode ierr; 423 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 424 PetscInt i,N,*rows,*owners = l->rowners; 425 PetscInt *nprocs,j,idx,nsends,row; 426 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 427 PetscInt *rvalues,count,base,slen,*source; 428 PetscInt *lens,*lrows,*values,rstart=l->rstart; 429 MPI_Comm comm = A->comm; 430 MPI_Request *send_waits,*recv_waits; 431 MPI_Status recv_status,*send_status; 432 IS istmp; 433 #if defined(PETSC_DEBUG) 434 PetscTruth found = PETSC_FALSE; 435 #endif 436 437 PetscFunctionBegin; 438 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 439 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 440 441 /* first count number of contributors to each processor */ 442 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 443 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 444 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 445 j = 0; 446 for (i=0; i<N; i++) { 447 if (lastidx > (idx = rows[i])) j = 0; 448 lastidx = idx; 449 for (; j<size; j++) { 450 if (idx >= owners[j] && idx < owners[j+1]) { 451 nprocs[2*j]++; 452 nprocs[2*j+1] = 1; 453 owner[i] = j; 454 #if defined(PETSC_DEBUG) 455 found = PETSC_TRUE; 456 #endif 457 break; 458 } 459 } 460 #if defined(PETSC_DEBUG) 461 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 462 found = PETSC_FALSE; 463 #endif 464 } 465 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 466 467 /* inform other processors of number of messages and max length*/ 468 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 469 470 /* post receives: */ 471 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 472 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 473 for (i=0; i<nrecvs; i++) { 474 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 475 } 476 477 /* do sends: 478 1) starts[i] gives the starting index in svalues for stuff going to 479 the ith processor 480 */ 481 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 482 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 483 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 484 starts[0] = 0; 485 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 486 for (i=0; i<N; i++) { 487 svalues[starts[owner[i]]++] = rows[i]; 488 } 489 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 490 491 starts[0] = 0; 492 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 493 count = 0; 494 for (i=0; i<size; i++) { 495 if (nprocs[2*i+1]) { 496 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 497 } 498 } 499 ierr = PetscFree(starts);CHKERRQ(ierr); 500 501 base = owners[rank]; 502 503 /* wait on receives */ 504 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 505 source = lens + nrecvs; 506 count = nrecvs; slen = 0; 507 while (count) { 508 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 509 /* unpack receives into our local space */ 510 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 511 source[imdex] = recv_status.MPI_SOURCE; 512 lens[imdex] = n; 513 slen += n; 514 count--; 515 } 516 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 517 518 /* move the data into the send scatter */ 519 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 520 count = 0; 521 for (i=0; i<nrecvs; i++) { 522 values = rvalues + i*nmax; 523 for (j=0; j<lens[i]; j++) { 524 lrows[count++] = values[j] - base; 525 } 526 } 527 ierr = PetscFree(rvalues);CHKERRQ(ierr); 528 ierr = PetscFree(lens);CHKERRQ(ierr); 529 ierr = PetscFree(owner);CHKERRQ(ierr); 530 ierr = PetscFree(nprocs);CHKERRQ(ierr); 531 532 /* actually zap the local rows */ 533 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 534 ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr); 535 536 /* 537 Zero the required rows. If the "diagonal block" of the matrix 538 is square and the user wishes to set the diagonal we use seperate 539 code so that MatSetValues() is not called for each diagonal allocating 540 new memory, thus calling lots of mallocs and slowing things down. 541 542 Contributed by: Mathew Knepley 543 */ 544 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 545 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 546 if (diag && (l->A->M == l->A->N)) { 547 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 548 } else if (diag) { 549 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 550 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 551 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 552 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 553 } 554 for (i = 0; i < slen; i++) { 555 row = lrows[i] + rstart; 556 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 557 } 558 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 559 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 560 } else { 561 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 562 } 563 ierr = ISDestroy(istmp);CHKERRQ(ierr); 564 ierr = PetscFree(lrows);CHKERRQ(ierr); 565 566 /* wait on sends */ 567 if (nsends) { 568 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 569 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 570 ierr = PetscFree(send_status);CHKERRQ(ierr); 571 } 572 ierr = PetscFree(send_waits);CHKERRQ(ierr); 573 ierr = PetscFree(svalues);CHKERRQ(ierr); 574 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatMult_MPIAIJ" 580 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 581 { 582 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 583 PetscErrorCode ierr; 584 PetscInt nt; 585 586 PetscFunctionBegin; 587 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 588 if (nt != A->n) { 589 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 590 } 591 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 592 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 593 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 594 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "MatMultAdd_MPIAIJ" 600 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 601 { 602 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 603 PetscErrorCode ierr; 604 605 PetscFunctionBegin; 606 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 607 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 608 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 609 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 610 PetscFunctionReturn(0); 611 } 612 613 #undef __FUNCT__ 614 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 615 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 616 { 617 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 618 PetscErrorCode ierr; 619 PetscTruth merged; 620 621 PetscFunctionBegin; 622 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 623 /* do nondiagonal part */ 624 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 625 if (!merged) { 626 /* send it on its way */ 627 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 628 /* do local part */ 629 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 630 /* receive remote parts: note this assumes the values are not actually */ 631 /* added in yy until the next line, */ 632 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 633 } else { 634 /* do local part */ 635 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 636 /* send it on its way */ 637 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 638 /* values actually were received in the Begin() but we need to call this nop */ 639 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 640 } 641 PetscFunctionReturn(0); 642 } 643 644 EXTERN_C_BEGIN 645 #undef __FUNCT__ 646 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 647 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 648 { 649 MPI_Comm comm; 650 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 651 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 652 IS Me,Notme; 653 PetscErrorCode ierr; 654 PetscInt M,N,first,last,*notme,i; 655 PetscMPIInt size; 656 657 PetscFunctionBegin; 658 659 /* Easy test: symmetric diagonal block */ 660 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 661 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 662 if (!*f) PetscFunctionReturn(0); 663 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 664 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 665 if (size == 1) PetscFunctionReturn(0); 666 667 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 668 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 669 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 670 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 671 for (i=0; i<first; i++) notme[i] = i; 672 for (i=last; i<M; i++) notme[i-last+first] = i; 673 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 674 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 675 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 676 Aoff = Aoffs[0]; 677 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 678 Boff = Boffs[0]; 679 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 680 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 681 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 682 ierr = ISDestroy(Me);CHKERRQ(ierr); 683 ierr = ISDestroy(Notme);CHKERRQ(ierr); 684 685 PetscFunctionReturn(0); 686 } 687 EXTERN_C_END 688 689 #undef __FUNCT__ 690 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 691 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 692 { 693 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 694 PetscErrorCode ierr; 695 696 PetscFunctionBegin; 697 /* do nondiagonal part */ 698 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 699 /* send it on its way */ 700 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 701 /* do local part */ 702 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 703 /* receive remote parts */ 704 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 /* 709 This only works correctly for square matrices where the subblock A->A is the 710 diagonal block 711 */ 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 714 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 715 { 716 PetscErrorCode ierr; 717 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 718 719 PetscFunctionBegin; 720 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 721 if (a->rstart != a->cstart || a->rend != a->cend) { 722 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 723 } 724 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatScale_MPIAIJ" 730 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 731 { 732 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 733 PetscErrorCode ierr; 734 735 PetscFunctionBegin; 736 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 737 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 738 PetscFunctionReturn(0); 739 } 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatDestroy_MPIAIJ" 743 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 744 { 745 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 746 PetscErrorCode ierr; 747 748 PetscFunctionBegin; 749 #if defined(PETSC_USE_LOG) 750 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 751 #endif 752 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 753 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 754 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 755 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 756 #if defined (PETSC_USE_CTABLE) 757 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 758 #else 759 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 760 #endif 761 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 762 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 763 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 764 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 765 ierr = PetscFree(aij);CHKERRQ(ierr); 766 767 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 768 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 769 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 770 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 771 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 772 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 773 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 774 PetscFunctionReturn(0); 775 } 776 777 #undef __FUNCT__ 778 #define __FUNCT__ "MatView_MPIAIJ_Binary" 779 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 780 { 781 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 782 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 783 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 784 PetscErrorCode ierr; 785 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 786 int fd; 787 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 788 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 789 PetscScalar *column_values; 790 791 PetscFunctionBegin; 792 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 793 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 794 nz = A->nz + B->nz; 795 if (!rank) { 796 header[0] = MAT_FILE_COOKIE; 797 header[1] = mat->M; 798 header[2] = mat->N; 799 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 800 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 801 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 802 /* get largest number of rows any processor has */ 803 rlen = mat->m; 804 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 805 for (i=1; i<size; i++) { 806 rlen = PetscMax(rlen,range[i+1] - range[i]); 807 } 808 } else { 809 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 810 rlen = mat->m; 811 } 812 813 /* load up the local row counts */ 814 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 815 for (i=0; i<mat->m; i++) { 816 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 817 } 818 819 /* store the row lengths to the file */ 820 if (!rank) { 821 MPI_Status status; 822 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 823 for (i=1; i<size; i++) { 824 rlen = range[i+1] - range[i]; 825 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 826 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 827 } 828 } else { 829 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 830 } 831 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 832 833 /* load up the local column indices */ 834 nzmax = nz; /* )th processor needs space a largest processor needs */ 835 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 836 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 837 cnt = 0; 838 for (i=0; i<mat->m; i++) { 839 for (j=B->i[i]; j<B->i[i+1]; j++) { 840 if ( (col = garray[B->j[j]]) > cstart) break; 841 column_indices[cnt++] = col; 842 } 843 for (k=A->i[i]; k<A->i[i+1]; k++) { 844 column_indices[cnt++] = A->j[k] + cstart; 845 } 846 for (; j<B->i[i+1]; j++) { 847 column_indices[cnt++] = garray[B->j[j]]; 848 } 849 } 850 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 851 852 /* store the column indices to the file */ 853 if (!rank) { 854 MPI_Status status; 855 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 856 for (i=1; i<size; i++) { 857 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 858 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 859 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 860 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 861 } 862 } else { 863 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 864 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 865 } 866 ierr = PetscFree(column_indices);CHKERRQ(ierr); 867 868 /* load up the local column values */ 869 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 870 cnt = 0; 871 for (i=0; i<mat->m; i++) { 872 for (j=B->i[i]; j<B->i[i+1]; j++) { 873 if ( garray[B->j[j]] > cstart) break; 874 column_values[cnt++] = B->a[j]; 875 } 876 for (k=A->i[i]; k<A->i[i+1]; k++) { 877 column_values[cnt++] = A->a[k]; 878 } 879 for (; j<B->i[i+1]; j++) { 880 column_values[cnt++] = B->a[j]; 881 } 882 } 883 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 884 885 /* store the column values to the file */ 886 if (!rank) { 887 MPI_Status status; 888 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 889 for (i=1; i<size; i++) { 890 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 891 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 892 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 893 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 894 } 895 } else { 896 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 897 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 898 } 899 ierr = PetscFree(column_values);CHKERRQ(ierr); 900 PetscFunctionReturn(0); 901 } 902 903 #undef __FUNCT__ 904 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 905 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 906 { 907 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 908 PetscErrorCode ierr; 909 PetscMPIInt rank = aij->rank,size = aij->size; 910 PetscTruth isdraw,iascii,isbinary; 911 PetscViewer sviewer; 912 PetscViewerFormat format; 913 914 PetscFunctionBegin; 915 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 916 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 917 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 918 if (iascii) { 919 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 920 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 921 MatInfo info; 922 PetscTruth inodes; 923 924 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 925 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 926 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 927 if (!inodes) { 928 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 929 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 930 } else { 931 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 932 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 933 } 934 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 935 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 936 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 937 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 938 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 939 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 940 PetscFunctionReturn(0); 941 } else if (format == PETSC_VIEWER_ASCII_INFO) { 942 PetscInt inodecount,inodelimit,*inodes; 943 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 944 if (inodes) { 945 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 946 } else { 947 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 948 } 949 PetscFunctionReturn(0); 950 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 951 PetscFunctionReturn(0); 952 } 953 } else if (isbinary) { 954 if (size == 1) { 955 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 956 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 957 } else { 958 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 959 } 960 PetscFunctionReturn(0); 961 } else if (isdraw) { 962 PetscDraw draw; 963 PetscTruth isnull; 964 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 965 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 966 } 967 968 if (size == 1) { 969 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 970 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 971 } else { 972 /* assemble the entire matrix onto first processor. */ 973 Mat A; 974 Mat_SeqAIJ *Aloc; 975 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 976 PetscScalar *a; 977 978 if (!rank) { 979 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 980 } else { 981 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 982 } 983 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 984 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 985 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 986 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 987 988 /* copy over the A part */ 989 Aloc = (Mat_SeqAIJ*)aij->A->data; 990 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 991 row = aij->rstart; 992 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 993 for (i=0; i<m; i++) { 994 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 995 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 996 } 997 aj = Aloc->j; 998 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 999 1000 /* copy over the B part */ 1001 Aloc = (Mat_SeqAIJ*)aij->B->data; 1002 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1003 row = aij->rstart; 1004 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1005 ct = cols; 1006 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1007 for (i=0; i<m; i++) { 1008 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1009 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1010 } 1011 ierr = PetscFree(ct);CHKERRQ(ierr); 1012 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1013 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1014 /* 1015 Everyone has to call to draw the matrix since the graphics waits are 1016 synchronized across all processors that share the PetscDraw object 1017 */ 1018 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1019 if (!rank) { 1020 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1021 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1022 } 1023 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1024 ierr = MatDestroy(A);CHKERRQ(ierr); 1025 } 1026 PetscFunctionReturn(0); 1027 } 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "MatView_MPIAIJ" 1031 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1032 { 1033 PetscErrorCode ierr; 1034 PetscTruth iascii,isdraw,issocket,isbinary; 1035 1036 PetscFunctionBegin; 1037 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1038 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1039 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1040 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1041 if (iascii || isdraw || isbinary || issocket) { 1042 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1043 } else { 1044 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1045 } 1046 PetscFunctionReturn(0); 1047 } 1048 1049 1050 1051 #undef __FUNCT__ 1052 #define __FUNCT__ "MatRelax_MPIAIJ" 1053 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1054 { 1055 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1056 PetscErrorCode ierr; 1057 Vec bb1; 1058 PetscScalar mone=-1.0; 1059 1060 PetscFunctionBegin; 1061 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1062 1063 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1064 1065 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1066 if (flag & SOR_ZERO_INITIAL_GUESS) { 1067 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1068 its--; 1069 } 1070 1071 while (its--) { 1072 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1073 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1074 1075 /* update rhs: bb1 = bb - B*x */ 1076 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1077 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1078 1079 /* local sweep */ 1080 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1081 CHKERRQ(ierr); 1082 } 1083 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1084 if (flag & SOR_ZERO_INITIAL_GUESS) { 1085 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1086 its--; 1087 } 1088 while (its--) { 1089 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1090 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1091 1092 /* update rhs: bb1 = bb - B*x */ 1093 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1094 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1095 1096 /* local sweep */ 1097 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1098 CHKERRQ(ierr); 1099 } 1100 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1101 if (flag & SOR_ZERO_INITIAL_GUESS) { 1102 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1103 its--; 1104 } 1105 while (its--) { 1106 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1107 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1108 1109 /* update rhs: bb1 = bb - B*x */ 1110 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1111 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1112 1113 /* local sweep */ 1114 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1115 CHKERRQ(ierr); 1116 } 1117 } else { 1118 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1119 } 1120 1121 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1127 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1128 { 1129 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1130 Mat A = mat->A,B = mat->B; 1131 PetscErrorCode ierr; 1132 PetscReal isend[5],irecv[5]; 1133 1134 PetscFunctionBegin; 1135 info->block_size = 1.0; 1136 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1137 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1138 isend[3] = info->memory; isend[4] = info->mallocs; 1139 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1140 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1141 isend[3] += info->memory; isend[4] += info->mallocs; 1142 if (flag == MAT_LOCAL) { 1143 info->nz_used = isend[0]; 1144 info->nz_allocated = isend[1]; 1145 info->nz_unneeded = isend[2]; 1146 info->memory = isend[3]; 1147 info->mallocs = isend[4]; 1148 } else if (flag == MAT_GLOBAL_MAX) { 1149 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1150 info->nz_used = irecv[0]; 1151 info->nz_allocated = irecv[1]; 1152 info->nz_unneeded = irecv[2]; 1153 info->memory = irecv[3]; 1154 info->mallocs = irecv[4]; 1155 } else if (flag == MAT_GLOBAL_SUM) { 1156 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1157 info->nz_used = irecv[0]; 1158 info->nz_allocated = irecv[1]; 1159 info->nz_unneeded = irecv[2]; 1160 info->memory = irecv[3]; 1161 info->mallocs = irecv[4]; 1162 } 1163 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1164 info->fill_ratio_needed = 0; 1165 info->factor_mallocs = 0; 1166 info->rows_global = (double)matin->M; 1167 info->columns_global = (double)matin->N; 1168 info->rows_local = (double)matin->m; 1169 info->columns_local = (double)matin->N; 1170 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatSetOption_MPIAIJ" 1176 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1177 { 1178 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1179 PetscErrorCode ierr; 1180 1181 PetscFunctionBegin; 1182 switch (op) { 1183 case MAT_NO_NEW_NONZERO_LOCATIONS: 1184 case MAT_YES_NEW_NONZERO_LOCATIONS: 1185 case MAT_COLUMNS_UNSORTED: 1186 case MAT_COLUMNS_SORTED: 1187 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1188 case MAT_KEEP_ZEROED_ROWS: 1189 case MAT_NEW_NONZERO_LOCATION_ERR: 1190 case MAT_USE_INODES: 1191 case MAT_DO_NOT_USE_INODES: 1192 case MAT_IGNORE_ZERO_ENTRIES: 1193 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1194 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1195 break; 1196 case MAT_ROW_ORIENTED: 1197 a->roworiented = PETSC_TRUE; 1198 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1199 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1200 break; 1201 case MAT_ROWS_SORTED: 1202 case MAT_ROWS_UNSORTED: 1203 case MAT_YES_NEW_DIAGONALS: 1204 ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr); 1205 break; 1206 case MAT_COLUMN_ORIENTED: 1207 a->roworiented = PETSC_FALSE; 1208 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1209 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1210 break; 1211 case MAT_IGNORE_OFF_PROC_ENTRIES: 1212 a->donotstash = PETSC_TRUE; 1213 break; 1214 case MAT_NO_NEW_DIAGONALS: 1215 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1216 case MAT_SYMMETRIC: 1217 case MAT_STRUCTURALLY_SYMMETRIC: 1218 case MAT_HERMITIAN: 1219 case MAT_SYMMETRY_ETERNAL: 1220 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1221 break; 1222 case MAT_NOT_SYMMETRIC: 1223 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1224 case MAT_NOT_HERMITIAN: 1225 case MAT_NOT_SYMMETRY_ETERNAL: 1226 break; 1227 default: 1228 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1229 } 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatGetRow_MPIAIJ" 1235 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1236 { 1237 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1238 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1239 PetscErrorCode ierr; 1240 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1241 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1242 PetscInt *cmap,*idx_p; 1243 1244 PetscFunctionBegin; 1245 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1246 mat->getrowactive = PETSC_TRUE; 1247 1248 if (!mat->rowvalues && (idx || v)) { 1249 /* 1250 allocate enough space to hold information from the longest row. 1251 */ 1252 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1253 PetscInt max = 1,tmp; 1254 for (i=0; i<matin->m; i++) { 1255 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1256 if (max < tmp) { max = tmp; } 1257 } 1258 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1259 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1260 } 1261 1262 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1263 lrow = row - rstart; 1264 1265 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1266 if (!v) {pvA = 0; pvB = 0;} 1267 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1268 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1269 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1270 nztot = nzA + nzB; 1271 1272 cmap = mat->garray; 1273 if (v || idx) { 1274 if (nztot) { 1275 /* Sort by increasing column numbers, assuming A and B already sorted */ 1276 PetscInt imark = -1; 1277 if (v) { 1278 *v = v_p = mat->rowvalues; 1279 for (i=0; i<nzB; i++) { 1280 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1281 else break; 1282 } 1283 imark = i; 1284 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1285 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1286 } 1287 if (idx) { 1288 *idx = idx_p = mat->rowindices; 1289 if (imark > -1) { 1290 for (i=0; i<imark; i++) { 1291 idx_p[i] = cmap[cworkB[i]]; 1292 } 1293 } else { 1294 for (i=0; i<nzB; i++) { 1295 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1296 else break; 1297 } 1298 imark = i; 1299 } 1300 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1301 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1302 } 1303 } else { 1304 if (idx) *idx = 0; 1305 if (v) *v = 0; 1306 } 1307 } 1308 *nz = nztot; 1309 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1310 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1311 PetscFunctionReturn(0); 1312 } 1313 1314 #undef __FUNCT__ 1315 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1316 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1317 { 1318 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1319 1320 PetscFunctionBegin; 1321 if (!aij->getrowactive) { 1322 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1323 } 1324 aij->getrowactive = PETSC_FALSE; 1325 PetscFunctionReturn(0); 1326 } 1327 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "MatNorm_MPIAIJ" 1330 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1331 { 1332 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1333 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1334 PetscErrorCode ierr; 1335 PetscInt i,j,cstart = aij->cstart; 1336 PetscReal sum = 0.0; 1337 PetscScalar *v; 1338 1339 PetscFunctionBegin; 1340 if (aij->size == 1) { 1341 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1342 } else { 1343 if (type == NORM_FROBENIUS) { 1344 v = amat->a; 1345 for (i=0; i<amat->nz; i++) { 1346 #if defined(PETSC_USE_COMPLEX) 1347 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1348 #else 1349 sum += (*v)*(*v); v++; 1350 #endif 1351 } 1352 v = bmat->a; 1353 for (i=0; i<bmat->nz; i++) { 1354 #if defined(PETSC_USE_COMPLEX) 1355 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1356 #else 1357 sum += (*v)*(*v); v++; 1358 #endif 1359 } 1360 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1361 *norm = sqrt(*norm); 1362 } else if (type == NORM_1) { /* max column norm */ 1363 PetscReal *tmp,*tmp2; 1364 PetscInt *jj,*garray = aij->garray; 1365 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1366 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1367 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1368 *norm = 0.0; 1369 v = amat->a; jj = amat->j; 1370 for (j=0; j<amat->nz; j++) { 1371 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1372 } 1373 v = bmat->a; jj = bmat->j; 1374 for (j=0; j<bmat->nz; j++) { 1375 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1376 } 1377 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1378 for (j=0; j<mat->N; j++) { 1379 if (tmp2[j] > *norm) *norm = tmp2[j]; 1380 } 1381 ierr = PetscFree(tmp);CHKERRQ(ierr); 1382 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1383 } else if (type == NORM_INFINITY) { /* max row norm */ 1384 PetscReal ntemp = 0.0; 1385 for (j=0; j<aij->A->m; j++) { 1386 v = amat->a + amat->i[j]; 1387 sum = 0.0; 1388 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1389 sum += PetscAbsScalar(*v); v++; 1390 } 1391 v = bmat->a + bmat->i[j]; 1392 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1393 sum += PetscAbsScalar(*v); v++; 1394 } 1395 if (sum > ntemp) ntemp = sum; 1396 } 1397 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1398 } else { 1399 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1400 } 1401 } 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatTranspose_MPIAIJ" 1407 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1408 { 1409 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1410 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1411 PetscErrorCode ierr; 1412 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1413 Mat B; 1414 PetscScalar *array; 1415 1416 PetscFunctionBegin; 1417 if (!matout && M != N) { 1418 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1419 } 1420 1421 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1422 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1423 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1424 1425 /* copy over the A part */ 1426 Aloc = (Mat_SeqAIJ*)a->A->data; 1427 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1428 row = a->rstart; 1429 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1430 for (i=0; i<m; i++) { 1431 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1432 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1433 } 1434 aj = Aloc->j; 1435 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1436 1437 /* copy over the B part */ 1438 Aloc = (Mat_SeqAIJ*)a->B->data; 1439 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1440 row = a->rstart; 1441 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1442 ct = cols; 1443 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1444 for (i=0; i<m; i++) { 1445 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1446 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1447 } 1448 ierr = PetscFree(ct);CHKERRQ(ierr); 1449 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1450 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1451 if (matout) { 1452 *matout = B; 1453 } else { 1454 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1461 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1462 { 1463 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1464 Mat a = aij->A,b = aij->B; 1465 PetscErrorCode ierr; 1466 PetscInt s1,s2,s3; 1467 1468 PetscFunctionBegin; 1469 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1470 if (rr) { 1471 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1472 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1473 /* Overlap communication with computation. */ 1474 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1475 } 1476 if (ll) { 1477 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1478 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1479 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1480 } 1481 /* scale the diagonal block */ 1482 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1483 1484 if (rr) { 1485 /* Do a scatter end and then right scale the off-diagonal block */ 1486 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1487 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1488 } 1489 1490 PetscFunctionReturn(0); 1491 } 1492 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1496 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1497 { 1498 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1499 PetscErrorCode ierr; 1500 1501 PetscFunctionBegin; 1502 if (!a->rank) { 1503 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1504 } 1505 PetscFunctionReturn(0); 1506 } 1507 1508 #undef __FUNCT__ 1509 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1510 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1511 { 1512 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1513 PetscErrorCode ierr; 1514 1515 PetscFunctionBegin; 1516 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1517 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1518 PetscFunctionReturn(0); 1519 } 1520 #undef __FUNCT__ 1521 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1522 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1523 { 1524 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1525 PetscErrorCode ierr; 1526 1527 PetscFunctionBegin; 1528 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1529 PetscFunctionReturn(0); 1530 } 1531 1532 #undef __FUNCT__ 1533 #define __FUNCT__ "MatEqual_MPIAIJ" 1534 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1535 { 1536 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1537 Mat a,b,c,d; 1538 PetscTruth flg; 1539 PetscErrorCode ierr; 1540 1541 PetscFunctionBegin; 1542 a = matA->A; b = matA->B; 1543 c = matB->A; d = matB->B; 1544 1545 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1546 if (flg) { 1547 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1548 } 1549 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1550 PetscFunctionReturn(0); 1551 } 1552 1553 #undef __FUNCT__ 1554 #define __FUNCT__ "MatCopy_MPIAIJ" 1555 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1556 { 1557 PetscErrorCode ierr; 1558 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1559 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1560 1561 PetscFunctionBegin; 1562 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1563 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1564 /* because of the column compression in the off-processor part of the matrix a->B, 1565 the number of columns in a->B and b->B may be different, hence we cannot call 1566 the MatCopy() directly on the two parts. If need be, we can provide a more 1567 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1568 then copying the submatrices */ 1569 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1570 } else { 1571 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1572 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1573 } 1574 PetscFunctionReturn(0); 1575 } 1576 1577 #undef __FUNCT__ 1578 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1579 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1580 { 1581 PetscErrorCode ierr; 1582 1583 PetscFunctionBegin; 1584 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #include "petscblaslapack.h" 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatAXPY_MPIAIJ" 1591 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1592 { 1593 PetscErrorCode ierr; 1594 PetscInt i; 1595 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1596 PetscBLASInt bnz,one=1; 1597 Mat_SeqAIJ *x,*y; 1598 1599 PetscFunctionBegin; 1600 if (str == SAME_NONZERO_PATTERN) { 1601 x = (Mat_SeqAIJ *)xx->A->data; 1602 y = (Mat_SeqAIJ *)yy->A->data; 1603 bnz = (PetscBLASInt)x->nz; 1604 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1605 x = (Mat_SeqAIJ *)xx->B->data; 1606 y = (Mat_SeqAIJ *)yy->B->data; 1607 bnz = (PetscBLASInt)x->nz; 1608 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1609 } else if (str == SUBSET_NONZERO_PATTERN) { 1610 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1611 1612 x = (Mat_SeqAIJ *)xx->B->data; 1613 y = (Mat_SeqAIJ *)yy->B->data; 1614 if (y->xtoy && y->XtoY != xx->B) { 1615 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1616 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1617 } 1618 if (!y->xtoy) { /* get xtoy */ 1619 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1620 y->XtoY = xx->B; 1621 } 1622 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1623 } else { 1624 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1625 } 1626 PetscFunctionReturn(0); 1627 } 1628 1629 /* -------------------------------------------------------------------*/ 1630 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1631 MatGetRow_MPIAIJ, 1632 MatRestoreRow_MPIAIJ, 1633 MatMult_MPIAIJ, 1634 /* 4*/ MatMultAdd_MPIAIJ, 1635 MatMultTranspose_MPIAIJ, 1636 MatMultTransposeAdd_MPIAIJ, 1637 0, 1638 0, 1639 0, 1640 /*10*/ 0, 1641 0, 1642 0, 1643 MatRelax_MPIAIJ, 1644 MatTranspose_MPIAIJ, 1645 /*15*/ MatGetInfo_MPIAIJ, 1646 MatEqual_MPIAIJ, 1647 MatGetDiagonal_MPIAIJ, 1648 MatDiagonalScale_MPIAIJ, 1649 MatNorm_MPIAIJ, 1650 /*20*/ MatAssemblyBegin_MPIAIJ, 1651 MatAssemblyEnd_MPIAIJ, 1652 0, 1653 MatSetOption_MPIAIJ, 1654 MatZeroEntries_MPIAIJ, 1655 /*25*/ MatZeroRows_MPIAIJ, 1656 0, 1657 0, 1658 0, 1659 0, 1660 /*30*/ MatSetUpPreallocation_MPIAIJ, 1661 0, 1662 0, 1663 0, 1664 0, 1665 /*35*/ MatDuplicate_MPIAIJ, 1666 0, 1667 0, 1668 0, 1669 0, 1670 /*40*/ MatAXPY_MPIAIJ, 1671 MatGetSubMatrices_MPIAIJ, 1672 MatIncreaseOverlap_MPIAIJ, 1673 MatGetValues_MPIAIJ, 1674 MatCopy_MPIAIJ, 1675 /*45*/ MatPrintHelp_MPIAIJ, 1676 MatScale_MPIAIJ, 1677 0, 1678 0, 1679 0, 1680 /*50*/ MatSetBlockSize_MPIAIJ, 1681 0, 1682 0, 1683 0, 1684 0, 1685 /*55*/ MatFDColoringCreate_MPIAIJ, 1686 0, 1687 MatSetUnfactored_MPIAIJ, 1688 0, 1689 0, 1690 /*60*/ MatGetSubMatrix_MPIAIJ, 1691 MatDestroy_MPIAIJ, 1692 MatView_MPIAIJ, 1693 MatGetPetscMaps_Petsc, 1694 0, 1695 /*65*/ 0, 1696 0, 1697 0, 1698 0, 1699 0, 1700 /*70*/ 0, 1701 0, 1702 MatSetColoring_MPIAIJ, 1703 #if defined(PETSC_HAVE_ADIC) 1704 MatSetValuesAdic_MPIAIJ, 1705 #else 1706 0, 1707 #endif 1708 MatSetValuesAdifor_MPIAIJ, 1709 /*75*/ 0, 1710 0, 1711 0, 1712 0, 1713 0, 1714 /*80*/ 0, 1715 0, 1716 0, 1717 0, 1718 /*84*/ MatLoad_MPIAIJ, 1719 0, 1720 0, 1721 0, 1722 0, 1723 0, 1724 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1725 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1726 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1727 MatPtAP_Basic, 1728 MatPtAPSymbolic_MPIAIJ, 1729 /*95*/ MatPtAPNumeric_MPIAIJ, 1730 0, 1731 0, 1732 0, 1733 0, 1734 /*100*/0, 1735 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1736 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1737 }; 1738 1739 /* ----------------------------------------------------------------------------------------*/ 1740 1741 EXTERN_C_BEGIN 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1744 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1745 { 1746 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1747 PetscErrorCode ierr; 1748 1749 PetscFunctionBegin; 1750 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1751 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1752 PetscFunctionReturn(0); 1753 } 1754 EXTERN_C_END 1755 1756 EXTERN_C_BEGIN 1757 #undef __FUNCT__ 1758 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1759 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1760 { 1761 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1762 PetscErrorCode ierr; 1763 1764 PetscFunctionBegin; 1765 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1766 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1767 PetscFunctionReturn(0); 1768 } 1769 EXTERN_C_END 1770 1771 #include "petscpc.h" 1772 EXTERN_C_BEGIN 1773 #undef __FUNCT__ 1774 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1775 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1776 { 1777 Mat_MPIAIJ *b; 1778 PetscErrorCode ierr; 1779 PetscInt i; 1780 1781 PetscFunctionBegin; 1782 B->preallocated = PETSC_TRUE; 1783 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1784 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1785 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1786 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1787 if (d_nnz) { 1788 for (i=0; i<B->m; i++) { 1789 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1790 } 1791 } 1792 if (o_nnz) { 1793 for (i=0; i<B->m; i++) { 1794 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1795 } 1796 } 1797 b = (Mat_MPIAIJ*)B->data; 1798 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1799 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1800 1801 PetscFunctionReturn(0); 1802 } 1803 EXTERN_C_END 1804 1805 #undef __FUNCT__ 1806 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1807 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1808 { 1809 Mat mat; 1810 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1811 PetscErrorCode ierr; 1812 1813 PetscFunctionBegin; 1814 *newmat = 0; 1815 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1816 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1817 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1818 a = (Mat_MPIAIJ*)mat->data; 1819 1820 mat->factor = matin->factor; 1821 mat->bs = matin->bs; 1822 mat->assembled = PETSC_TRUE; 1823 mat->insertmode = NOT_SET_VALUES; 1824 mat->preallocated = PETSC_TRUE; 1825 1826 a->rstart = oldmat->rstart; 1827 a->rend = oldmat->rend; 1828 a->cstart = oldmat->cstart; 1829 a->cend = oldmat->cend; 1830 a->size = oldmat->size; 1831 a->rank = oldmat->rank; 1832 a->donotstash = oldmat->donotstash; 1833 a->roworiented = oldmat->roworiented; 1834 a->rowindices = 0; 1835 a->rowvalues = 0; 1836 a->getrowactive = PETSC_FALSE; 1837 1838 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1839 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1840 if (oldmat->colmap) { 1841 #if defined (PETSC_USE_CTABLE) 1842 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1843 #else 1844 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1845 ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1846 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1847 #endif 1848 } else a->colmap = 0; 1849 if (oldmat->garray) { 1850 PetscInt len; 1851 len = oldmat->B->n; 1852 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1853 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1854 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1855 } else a->garray = 0; 1856 1857 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1858 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1859 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1860 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1861 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1862 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1863 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1864 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1865 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1866 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1867 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1868 *newmat = mat; 1869 PetscFunctionReturn(0); 1870 } 1871 1872 #include "petscsys.h" 1873 1874 #undef __FUNCT__ 1875 #define __FUNCT__ "MatLoad_MPIAIJ" 1876 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1877 { 1878 Mat A; 1879 PetscScalar *vals,*svals; 1880 MPI_Comm comm = ((PetscObject)viewer)->comm; 1881 MPI_Status status; 1882 PetscErrorCode ierr; 1883 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1884 PetscInt i,nz,j,rstart,rend,mmax; 1885 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1886 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1887 PetscInt cend,cstart,n,*rowners; 1888 int fd; 1889 1890 PetscFunctionBegin; 1891 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1892 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1893 if (!rank) { 1894 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1895 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1896 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1897 } 1898 1899 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1900 M = header[1]; N = header[2]; 1901 /* determine ownership of all rows */ 1902 m = M/size + ((M % size) > rank); 1903 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1904 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1905 1906 /* First process needs enough room for process with most rows */ 1907 if (!rank) { 1908 mmax = rowners[1]; 1909 for (i=2; i<size; i++) { 1910 mmax = PetscMax(mmax,rowners[i]); 1911 } 1912 } else mmax = m; 1913 1914 rowners[0] = 0; 1915 for (i=2; i<=size; i++) { 1916 mmax = PetscMax(mmax,rowners[i]); 1917 rowners[i] += rowners[i-1]; 1918 } 1919 rstart = rowners[rank]; 1920 rend = rowners[rank+1]; 1921 1922 /* distribute row lengths to all processors */ 1923 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 1924 if (!rank) { 1925 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1926 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1927 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1928 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1929 for (j=0; j<m; j++) { 1930 procsnz[0] += ourlens[j]; 1931 } 1932 for (i=1; i<size; i++) { 1933 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1934 /* calculate the number of nonzeros on each processor */ 1935 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1936 procsnz[i] += rowlengths[j]; 1937 } 1938 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1939 } 1940 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1941 } else { 1942 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1943 } 1944 1945 if (!rank) { 1946 /* determine max buffer needed and allocate it */ 1947 maxnz = 0; 1948 for (i=0; i<size; i++) { 1949 maxnz = PetscMax(maxnz,procsnz[i]); 1950 } 1951 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1952 1953 /* read in my part of the matrix column indices */ 1954 nz = procsnz[0]; 1955 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1956 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1957 1958 /* read in every one elses and ship off */ 1959 for (i=1; i<size; i++) { 1960 nz = procsnz[i]; 1961 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1962 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1963 } 1964 ierr = PetscFree(cols);CHKERRQ(ierr); 1965 } else { 1966 /* determine buffer space needed for message */ 1967 nz = 0; 1968 for (i=0; i<m; i++) { 1969 nz += ourlens[i]; 1970 } 1971 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1972 1973 /* receive message of column indices*/ 1974 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1975 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1976 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1977 } 1978 1979 /* determine column ownership if matrix is not square */ 1980 if (N != M) { 1981 n = N/size + ((N % size) > rank); 1982 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1983 cstart = cend - n; 1984 } else { 1985 cstart = rstart; 1986 cend = rend; 1987 n = cend - cstart; 1988 } 1989 1990 /* loop over local rows, determining number of off diagonal entries */ 1991 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1992 jj = 0; 1993 for (i=0; i<m; i++) { 1994 for (j=0; j<ourlens[i]; j++) { 1995 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1996 jj++; 1997 } 1998 } 1999 2000 /* create our matrix */ 2001 for (i=0; i<m; i++) { 2002 ourlens[i] -= offlens[i]; 2003 } 2004 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2005 ierr = MatSetType(A,type);CHKERRQ(ierr); 2006 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2007 2008 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2009 for (i=0; i<m; i++) { 2010 ourlens[i] += offlens[i]; 2011 } 2012 2013 if (!rank) { 2014 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2015 2016 /* read in my part of the matrix numerical values */ 2017 nz = procsnz[0]; 2018 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2019 2020 /* insert into matrix */ 2021 jj = rstart; 2022 smycols = mycols; 2023 svals = vals; 2024 for (i=0; i<m; i++) { 2025 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2026 smycols += ourlens[i]; 2027 svals += ourlens[i]; 2028 jj++; 2029 } 2030 2031 /* read in other processors and ship out */ 2032 for (i=1; i<size; i++) { 2033 nz = procsnz[i]; 2034 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2035 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2036 } 2037 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2038 } else { 2039 /* receive numeric values */ 2040 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2041 2042 /* receive message of values*/ 2043 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2044 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2045 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2046 2047 /* insert into matrix */ 2048 jj = rstart; 2049 smycols = mycols; 2050 svals = vals; 2051 for (i=0; i<m; i++) { 2052 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2053 smycols += ourlens[i]; 2054 svals += ourlens[i]; 2055 jj++; 2056 } 2057 } 2058 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2059 ierr = PetscFree(vals);CHKERRQ(ierr); 2060 ierr = PetscFree(mycols);CHKERRQ(ierr); 2061 ierr = PetscFree(rowners);CHKERRQ(ierr); 2062 2063 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2064 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2065 *newmat = A; 2066 PetscFunctionReturn(0); 2067 } 2068 2069 #undef __FUNCT__ 2070 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2071 /* 2072 Not great since it makes two copies of the submatrix, first an SeqAIJ 2073 in local and then by concatenating the local matrices the end result. 2074 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2075 */ 2076 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2077 { 2078 PetscErrorCode ierr; 2079 PetscMPIInt rank,size; 2080 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2081 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2082 Mat *local,M,Mreuse; 2083 PetscScalar *vwork,*aa; 2084 MPI_Comm comm = mat->comm; 2085 Mat_SeqAIJ *aij; 2086 2087 2088 PetscFunctionBegin; 2089 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2090 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2091 2092 if (call == MAT_REUSE_MATRIX) { 2093 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2094 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2095 local = &Mreuse; 2096 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2097 } else { 2098 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2099 Mreuse = *local; 2100 ierr = PetscFree(local);CHKERRQ(ierr); 2101 } 2102 2103 /* 2104 m - number of local rows 2105 n - number of columns (same on all processors) 2106 rstart - first row in new global matrix generated 2107 */ 2108 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2109 if (call == MAT_INITIAL_MATRIX) { 2110 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2111 ii = aij->i; 2112 jj = aij->j; 2113 2114 /* 2115 Determine the number of non-zeros in the diagonal and off-diagonal 2116 portions of the matrix in order to do correct preallocation 2117 */ 2118 2119 /* first get start and end of "diagonal" columns */ 2120 if (csize == PETSC_DECIDE) { 2121 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2122 if (mglobal == n) { /* square matrix */ 2123 nlocal = m; 2124 } else { 2125 nlocal = n/size + ((n % size) > rank); 2126 } 2127 } else { 2128 nlocal = csize; 2129 } 2130 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2131 rstart = rend - nlocal; 2132 if (rank == size - 1 && rend != n) { 2133 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2134 } 2135 2136 /* next, compute all the lengths */ 2137 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2138 olens = dlens + m; 2139 for (i=0; i<m; i++) { 2140 jend = ii[i+1] - ii[i]; 2141 olen = 0; 2142 dlen = 0; 2143 for (j=0; j<jend; j++) { 2144 if (*jj < rstart || *jj >= rend) olen++; 2145 else dlen++; 2146 jj++; 2147 } 2148 olens[i] = olen; 2149 dlens[i] = dlen; 2150 } 2151 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2152 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2153 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2154 ierr = PetscFree(dlens);CHKERRQ(ierr); 2155 } else { 2156 PetscInt ml,nl; 2157 2158 M = *newmat; 2159 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2160 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2161 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2162 /* 2163 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2164 rather than the slower MatSetValues(). 2165 */ 2166 M->was_assembled = PETSC_TRUE; 2167 M->assembled = PETSC_FALSE; 2168 } 2169 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2170 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2171 ii = aij->i; 2172 jj = aij->j; 2173 aa = aij->a; 2174 for (i=0; i<m; i++) { 2175 row = rstart + i; 2176 nz = ii[i+1] - ii[i]; 2177 cwork = jj; jj += nz; 2178 vwork = aa; aa += nz; 2179 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2180 } 2181 2182 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2183 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2184 *newmat = M; 2185 2186 /* save submatrix used in processor for next request */ 2187 if (call == MAT_INITIAL_MATRIX) { 2188 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2189 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2190 } 2191 2192 PetscFunctionReturn(0); 2193 } 2194 2195 EXTERN_C_BEGIN 2196 #undef __FUNCT__ 2197 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2198 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2199 { 2200 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2201 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2202 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2203 const PetscInt *JJ; 2204 PetscScalar *values; 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 #if defined(PETSC_OPT_g) 2209 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2210 #endif 2211 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2212 o_nnz = d_nnz + m; 2213 2214 for (i=0; i<m; i++) { 2215 nnz = I[i+1]- I[i]; 2216 JJ = J + I[i]; 2217 nnz_max = PetscMax(nnz_max,nnz); 2218 #if defined(PETSC_OPT_g) 2219 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2220 #endif 2221 for (j=0; j<nnz; j++) { 2222 if (*JJ >= cstart) break; 2223 JJ++; 2224 } 2225 d = 0; 2226 for (; j<nnz; j++) { 2227 if (*JJ++ >= cend) break; 2228 d++; 2229 } 2230 d_nnz[i] = d; 2231 o_nnz[i] = nnz - d; 2232 } 2233 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2234 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2235 2236 if (v) values = (PetscScalar*)v; 2237 else { 2238 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2239 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2240 } 2241 2242 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2243 for (i=0; i<m; i++) { 2244 ii = i + rstart; 2245 nnz = I[i+1]- I[i]; 2246 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2247 } 2248 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2249 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2250 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2251 2252 if (!v) { 2253 ierr = PetscFree(values);CHKERRQ(ierr); 2254 } 2255 PetscFunctionReturn(0); 2256 } 2257 EXTERN_C_END 2258 2259 #undef __FUNCT__ 2260 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2261 /*@C 2262 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2263 (the default parallel PETSc format). 2264 2265 Collective on MPI_Comm 2266 2267 Input Parameters: 2268 + A - the matrix 2269 . i - the indices into j for the start of each local row (starts with zero) 2270 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2271 - v - optional values in the matrix 2272 2273 Level: developer 2274 2275 .keywords: matrix, aij, compressed row, sparse, parallel 2276 2277 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2278 @*/ 2279 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2280 { 2281 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2282 2283 PetscFunctionBegin; 2284 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2285 if (f) { 2286 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2287 } 2288 PetscFunctionReturn(0); 2289 } 2290 2291 #undef __FUNCT__ 2292 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2293 /*@C 2294 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2295 (the default parallel PETSc format). For good matrix assembly performance 2296 the user should preallocate the matrix storage by setting the parameters 2297 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2298 performance can be increased by more than a factor of 50. 2299 2300 Collective on MPI_Comm 2301 2302 Input Parameters: 2303 + A - the matrix 2304 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2305 (same value is used for all local rows) 2306 . d_nnz - array containing the number of nonzeros in the various rows of the 2307 DIAGONAL portion of the local submatrix (possibly different for each row) 2308 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2309 The size of this array is equal to the number of local rows, i.e 'm'. 2310 You must leave room for the diagonal entry even if it is zero. 2311 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2312 submatrix (same value is used for all local rows). 2313 - o_nnz - array containing the number of nonzeros in the various rows of the 2314 OFF-DIAGONAL portion of the local submatrix (possibly different for 2315 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2316 structure. The size of this array is equal to the number 2317 of local rows, i.e 'm'. 2318 2319 If the *_nnz parameter is given then the *_nz parameter is ignored 2320 2321 The AIJ format (also called the Yale sparse matrix format or 2322 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2323 storage. The stored row and column indices begin with zero. See the users manual for details. 2324 2325 The parallel matrix is partitioned such that the first m0 rows belong to 2326 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2327 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2328 2329 The DIAGONAL portion of the local submatrix of a processor can be defined 2330 as the submatrix which is obtained by extraction the part corresponding 2331 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2332 first row that belongs to the processor, and r2 is the last row belonging 2333 to the this processor. This is a square mxm matrix. The remaining portion 2334 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2335 2336 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2337 2338 Example usage: 2339 2340 Consider the following 8x8 matrix with 34 non-zero values, that is 2341 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2342 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2343 as follows: 2344 2345 .vb 2346 1 2 0 | 0 3 0 | 0 4 2347 Proc0 0 5 6 | 7 0 0 | 8 0 2348 9 0 10 | 11 0 0 | 12 0 2349 ------------------------------------- 2350 13 0 14 | 15 16 17 | 0 0 2351 Proc1 0 18 0 | 19 20 21 | 0 0 2352 0 0 0 | 22 23 0 | 24 0 2353 ------------------------------------- 2354 Proc2 25 26 27 | 0 0 28 | 29 0 2355 30 0 0 | 31 32 33 | 0 34 2356 .ve 2357 2358 This can be represented as a collection of submatrices as: 2359 2360 .vb 2361 A B C 2362 D E F 2363 G H I 2364 .ve 2365 2366 Where the submatrices A,B,C are owned by proc0, D,E,F are 2367 owned by proc1, G,H,I are owned by proc2. 2368 2369 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2370 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2371 The 'M','N' parameters are 8,8, and have the same values on all procs. 2372 2373 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2374 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2375 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2376 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2377 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2378 matrix, ans [DF] as another SeqAIJ matrix. 2379 2380 When d_nz, o_nz parameters are specified, d_nz storage elements are 2381 allocated for every row of the local diagonal submatrix, and o_nz 2382 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2383 One way to choose d_nz and o_nz is to use the max nonzerors per local 2384 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2385 In this case, the values of d_nz,o_nz are: 2386 .vb 2387 proc0 : dnz = 2, o_nz = 2 2388 proc1 : dnz = 3, o_nz = 2 2389 proc2 : dnz = 1, o_nz = 4 2390 .ve 2391 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2392 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2393 for proc3. i.e we are using 12+15+10=37 storage locations to store 2394 34 values. 2395 2396 When d_nnz, o_nnz parameters are specified, the storage is specified 2397 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2398 In the above case the values for d_nnz,o_nnz are: 2399 .vb 2400 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2401 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2402 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2403 .ve 2404 Here the space allocated is sum of all the above values i.e 34, and 2405 hence pre-allocation is perfect. 2406 2407 Level: intermediate 2408 2409 .keywords: matrix, aij, compressed row, sparse, parallel 2410 2411 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2412 MPIAIJ 2413 @*/ 2414 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2415 { 2416 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2417 2418 PetscFunctionBegin; 2419 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2420 if (f) { 2421 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2422 } 2423 PetscFunctionReturn(0); 2424 } 2425 2426 #undef __FUNCT__ 2427 #define __FUNCT__ "MatCreateMPIAIJ" 2428 /*@C 2429 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2430 (the default parallel PETSc format). For good matrix assembly performance 2431 the user should preallocate the matrix storage by setting the parameters 2432 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2433 performance can be increased by more than a factor of 50. 2434 2435 Collective on MPI_Comm 2436 2437 Input Parameters: 2438 + comm - MPI communicator 2439 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2440 This value should be the same as the local size used in creating the 2441 y vector for the matrix-vector product y = Ax. 2442 . n - This value should be the same as the local size used in creating the 2443 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2444 calculated if N is given) For square matrices n is almost always m. 2445 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2446 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2447 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2448 (same value is used for all local rows) 2449 . d_nnz - array containing the number of nonzeros in the various rows of the 2450 DIAGONAL portion of the local submatrix (possibly different for each row) 2451 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2452 The size of this array is equal to the number of local rows, i.e 'm'. 2453 You must leave room for the diagonal entry even if it is zero. 2454 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2455 submatrix (same value is used for all local rows). 2456 - o_nnz - array containing the number of nonzeros in the various rows of the 2457 OFF-DIAGONAL portion of the local submatrix (possibly different for 2458 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2459 structure. The size of this array is equal to the number 2460 of local rows, i.e 'm'. 2461 2462 Output Parameter: 2463 . A - the matrix 2464 2465 Notes: 2466 If the *_nnz parameter is given then the *_nz parameter is ignored 2467 2468 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2469 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2470 storage requirements for this matrix. 2471 2472 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2473 processor than it must be used on all processors that share the object for 2474 that argument. 2475 2476 The user MUST specify either the local or global matrix dimensions 2477 (possibly both). 2478 2479 The parallel matrix is partitioned such that the first m0 rows belong to 2480 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2481 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2482 2483 The DIAGONAL portion of the local submatrix of a processor can be defined 2484 as the submatrix which is obtained by extraction the part corresponding 2485 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2486 first row that belongs to the processor, and r2 is the last row belonging 2487 to the this processor. This is a square mxm matrix. The remaining portion 2488 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2489 2490 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2491 2492 When calling this routine with a single process communicator, a matrix of 2493 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2494 type of communicator, use the construction mechanism: 2495 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2496 2497 By default, this format uses inodes (identical nodes) when possible. 2498 We search for consecutive rows with the same nonzero structure, thereby 2499 reusing matrix information to achieve increased efficiency. 2500 2501 Options Database Keys: 2502 + -mat_no_inode - Do not use inodes 2503 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2504 - -mat_aij_oneindex - Internally use indexing starting at 1 2505 rather than 0. Note that when calling MatSetValues(), 2506 the user still MUST index entries starting at 0! 2507 2508 2509 Example usage: 2510 2511 Consider the following 8x8 matrix with 34 non-zero values, that is 2512 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2513 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2514 as follows: 2515 2516 .vb 2517 1 2 0 | 0 3 0 | 0 4 2518 Proc0 0 5 6 | 7 0 0 | 8 0 2519 9 0 10 | 11 0 0 | 12 0 2520 ------------------------------------- 2521 13 0 14 | 15 16 17 | 0 0 2522 Proc1 0 18 0 | 19 20 21 | 0 0 2523 0 0 0 | 22 23 0 | 24 0 2524 ------------------------------------- 2525 Proc2 25 26 27 | 0 0 28 | 29 0 2526 30 0 0 | 31 32 33 | 0 34 2527 .ve 2528 2529 This can be represented as a collection of submatrices as: 2530 2531 .vb 2532 A B C 2533 D E F 2534 G H I 2535 .ve 2536 2537 Where the submatrices A,B,C are owned by proc0, D,E,F are 2538 owned by proc1, G,H,I are owned by proc2. 2539 2540 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2541 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2542 The 'M','N' parameters are 8,8, and have the same values on all procs. 2543 2544 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2545 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2546 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2547 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2548 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2549 matrix, ans [DF] as another SeqAIJ matrix. 2550 2551 When d_nz, o_nz parameters are specified, d_nz storage elements are 2552 allocated for every row of the local diagonal submatrix, and o_nz 2553 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2554 One way to choose d_nz and o_nz is to use the max nonzerors per local 2555 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2556 In this case, the values of d_nz,o_nz are: 2557 .vb 2558 proc0 : dnz = 2, o_nz = 2 2559 proc1 : dnz = 3, o_nz = 2 2560 proc2 : dnz = 1, o_nz = 4 2561 .ve 2562 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2563 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2564 for proc3. i.e we are using 12+15+10=37 storage locations to store 2565 34 values. 2566 2567 When d_nnz, o_nnz parameters are specified, the storage is specified 2568 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2569 In the above case the values for d_nnz,o_nnz are: 2570 .vb 2571 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2572 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2573 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2574 .ve 2575 Here the space allocated is sum of all the above values i.e 34, and 2576 hence pre-allocation is perfect. 2577 2578 Level: intermediate 2579 2580 .keywords: matrix, aij, compressed row, sparse, parallel 2581 2582 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2583 MPIAIJ 2584 @*/ 2585 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2586 { 2587 PetscErrorCode ierr; 2588 PetscMPIInt size; 2589 2590 PetscFunctionBegin; 2591 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2592 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2593 if (size > 1) { 2594 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2595 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2596 } else { 2597 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2598 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2599 } 2600 PetscFunctionReturn(0); 2601 } 2602 2603 #undef __FUNCT__ 2604 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2605 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2606 { 2607 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2608 2609 PetscFunctionBegin; 2610 *Ad = a->A; 2611 *Ao = a->B; 2612 *colmap = a->garray; 2613 PetscFunctionReturn(0); 2614 } 2615 2616 #undef __FUNCT__ 2617 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2618 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2619 { 2620 PetscErrorCode ierr; 2621 PetscInt i; 2622 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2623 2624 PetscFunctionBegin; 2625 if (coloring->ctype == IS_COLORING_LOCAL) { 2626 ISColoringValue *allcolors,*colors; 2627 ISColoring ocoloring; 2628 2629 /* set coloring for diagonal portion */ 2630 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2631 2632 /* set coloring for off-diagonal portion */ 2633 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2634 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2635 for (i=0; i<a->B->n; i++) { 2636 colors[i] = allcolors[a->garray[i]]; 2637 } 2638 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2639 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2640 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2641 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2642 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2643 ISColoringValue *colors; 2644 PetscInt *larray; 2645 ISColoring ocoloring; 2646 2647 /* set coloring for diagonal portion */ 2648 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2649 for (i=0; i<a->A->n; i++) { 2650 larray[i] = i + a->cstart; 2651 } 2652 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2653 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2654 for (i=0; i<a->A->n; i++) { 2655 colors[i] = coloring->colors[larray[i]]; 2656 } 2657 ierr = PetscFree(larray);CHKERRQ(ierr); 2658 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2659 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2660 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2661 2662 /* set coloring for off-diagonal portion */ 2663 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2664 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2665 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2666 for (i=0; i<a->B->n; i++) { 2667 colors[i] = coloring->colors[larray[i]]; 2668 } 2669 ierr = PetscFree(larray);CHKERRQ(ierr); 2670 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2671 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2672 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2673 } else { 2674 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2675 } 2676 2677 PetscFunctionReturn(0); 2678 } 2679 2680 #if defined(PETSC_HAVE_ADIC) 2681 #undef __FUNCT__ 2682 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2683 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2684 { 2685 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2686 PetscErrorCode ierr; 2687 2688 PetscFunctionBegin; 2689 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2690 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2691 PetscFunctionReturn(0); 2692 } 2693 #endif 2694 2695 #undef __FUNCT__ 2696 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2697 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2698 { 2699 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2700 PetscErrorCode ierr; 2701 2702 PetscFunctionBegin; 2703 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2704 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2705 PetscFunctionReturn(0); 2706 } 2707 2708 #undef __FUNCT__ 2709 #define __FUNCT__ "MatMerge" 2710 /*@C 2711 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2712 matrices from each processor 2713 2714 Collective on MPI_Comm 2715 2716 Input Parameters: 2717 + comm - the communicators the parallel matrix will live on 2718 . inmat - the input sequential matrices 2719 . n - number of local columns (or PETSC_DECIDE) 2720 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2721 2722 Output Parameter: 2723 . outmat - the parallel matrix generated 2724 2725 Level: advanced 2726 2727 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2728 2729 @*/ 2730 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2731 { 2732 PetscErrorCode ierr; 2733 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2734 PetscInt *indx; 2735 PetscScalar *values; 2736 PetscMap columnmap,rowmap; 2737 2738 PetscFunctionBegin; 2739 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2740 /* 2741 PetscMPIInt rank; 2742 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2743 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2744 */ 2745 if (scall == MAT_INITIAL_MATRIX){ 2746 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2747 if (n == PETSC_DECIDE){ 2748 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2749 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2750 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2751 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2752 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2753 } 2754 2755 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2756 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2757 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2758 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2759 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2760 2761 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2762 for (i=0;i<m;i++) { 2763 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2764 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2765 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2766 } 2767 /* This routine will ONLY return MPIAIJ type matrix */ 2768 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2769 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2770 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2771 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2772 2773 } else if (scall == MAT_REUSE_MATRIX){ 2774 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2775 } else { 2776 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2777 } 2778 2779 for (i=0;i<m;i++) { 2780 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2781 I = i + rstart; 2782 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2783 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2784 } 2785 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2786 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2787 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2788 2789 PetscFunctionReturn(0); 2790 } 2791 2792 #undef __FUNCT__ 2793 #define __FUNCT__ "MatFileSplit" 2794 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2795 { 2796 PetscErrorCode ierr; 2797 PetscMPIInt rank; 2798 PetscInt m,N,i,rstart,nnz; 2799 size_t len; 2800 const PetscInt *indx; 2801 PetscViewer out; 2802 char *name; 2803 Mat B; 2804 const PetscScalar *values; 2805 2806 PetscFunctionBegin; 2807 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2808 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2809 /* Should this be the type of the diagonal block of A? */ 2810 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2811 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2812 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2813 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2814 for (i=0;i<m;i++) { 2815 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2816 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2817 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2818 } 2819 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2820 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2821 2822 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2823 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2824 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2825 sprintf(name,"%s.%d",outfile,rank); 2826 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2827 ierr = PetscFree(name); 2828 ierr = MatView(B,out);CHKERRQ(ierr); 2829 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2830 ierr = MatDestroy(B);CHKERRQ(ierr); 2831 PetscFunctionReturn(0); 2832 } 2833 2834 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2835 #undef __FUNCT__ 2836 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2837 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2838 { 2839 PetscErrorCode ierr; 2840 Mat_Merge_SeqsToMPI *merge; 2841 PetscObjectContainer container; 2842 2843 PetscFunctionBegin; 2844 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2845 if (container) { 2846 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2847 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2848 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2849 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2850 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2851 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2852 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2853 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2854 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2855 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2856 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2857 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2858 2859 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2860 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2861 } 2862 ierr = PetscFree(merge);CHKERRQ(ierr); 2863 2864 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2865 PetscFunctionReturn(0); 2866 } 2867 2868 #include "src/mat/utils/freespace.h" 2869 #include "petscbt.h" 2870 static PetscEvent logkey_seqstompinum = 0; 2871 #undef __FUNCT__ 2872 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 2873 /*@C 2874 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2875 matrices from each processor 2876 2877 Collective on MPI_Comm 2878 2879 Input Parameters: 2880 + comm - the communicators the parallel matrix will live on 2881 . seqmat - the input sequential matrices 2882 . m - number of local rows (or PETSC_DECIDE) 2883 . n - number of local columns (or PETSC_DECIDE) 2884 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2885 2886 Output Parameter: 2887 . mpimat - the parallel matrix generated 2888 2889 Level: advanced 2890 2891 Notes: 2892 The dimensions of the sequential matrix in each processor MUST be the same. 2893 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2894 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2895 @*/ 2896 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2897 { 2898 PetscErrorCode ierr; 2899 MPI_Comm comm=mpimat->comm; 2900 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2901 PetscMPIInt size,rank,taga,*len_s; 2902 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2903 PetscInt proc,m; 2904 PetscInt **buf_ri,**buf_rj; 2905 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2906 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2907 MPI_Request *s_waits,*r_waits; 2908 MPI_Status *status; 2909 MatScalar *aa=a->a,**abuf_r,*ba_i; 2910 Mat_Merge_SeqsToMPI *merge; 2911 PetscObjectContainer container; 2912 2913 PetscFunctionBegin; 2914 if (!logkey_seqstompinum) { 2915 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2916 } 2917 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2918 2919 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2920 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2921 2922 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2923 if (container) { 2924 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2925 } 2926 bi = merge->bi; 2927 bj = merge->bj; 2928 buf_ri = merge->buf_ri; 2929 buf_rj = merge->buf_rj; 2930 2931 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2932 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2933 len_s = merge->len_s; 2934 2935 /* send and recv matrix values */ 2936 /*-----------------------------*/ 2937 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2938 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2939 2940 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2941 for (proc=0,k=0; proc<size; proc++){ 2942 if (!len_s[proc]) continue; 2943 i = owners[proc]; 2944 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2945 k++; 2946 } 2947 2948 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2949 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2950 ierr = PetscFree(status);CHKERRQ(ierr); 2951 2952 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2953 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2954 2955 /* insert mat values of mpimat */ 2956 /*----------------------------*/ 2957 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2958 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2959 nextrow = buf_ri_k + merge->nrecv; 2960 nextai = nextrow + merge->nrecv; 2961 2962 for (k=0; k<merge->nrecv; k++){ 2963 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2964 nrows = *(buf_ri_k[k]); 2965 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2966 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2967 } 2968 2969 /* set values of ba */ 2970 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2971 for (i=0; i<m; i++) { 2972 arow = owners[rank] + i; 2973 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2974 bnzi = bi[i+1] - bi[i]; 2975 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2976 2977 /* add local non-zero vals of this proc's seqmat into ba */ 2978 anzi = ai[arow+1] - ai[arow]; 2979 aj = a->j + ai[arow]; 2980 aa = a->a + ai[arow]; 2981 nextaj = 0; 2982 for (j=0; nextaj<anzi; j++){ 2983 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2984 ba_i[j] += aa[nextaj++]; 2985 } 2986 } 2987 2988 /* add received vals into ba */ 2989 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2990 /* i-th row */ 2991 if (i == *nextrow[k]) { 2992 anzi = *(nextai[k]+1) - *nextai[k]; 2993 aj = buf_rj[k] + *(nextai[k]); 2994 aa = abuf_r[k] + *(nextai[k]); 2995 nextaj = 0; 2996 for (j=0; nextaj<anzi; j++){ 2997 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2998 ba_i[j] += aa[nextaj++]; 2999 } 3000 } 3001 nextrow[k]++; nextai[k]++; 3002 } 3003 } 3004 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3005 } 3006 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3007 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3008 3009 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3010 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3011 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3012 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3013 PetscFunctionReturn(0); 3014 } 3015 3016 static PetscEvent logkey_seqstompisym = 0; 3017 #undef __FUNCT__ 3018 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3019 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3020 { 3021 PetscErrorCode ierr; 3022 Mat B_mpi; 3023 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3024 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3025 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3026 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3027 PetscInt len,proc,*dnz,*onz; 3028 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3029 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3030 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3031 MPI_Status *status; 3032 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3033 PetscBT lnkbt; 3034 Mat_Merge_SeqsToMPI *merge; 3035 PetscObjectContainer container; 3036 3037 PetscFunctionBegin; 3038 if (!logkey_seqstompisym) { 3039 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3040 } 3041 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3042 3043 /* make sure it is a PETSc comm */ 3044 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3045 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3046 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3047 3048 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3049 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3050 3051 /* determine row ownership */ 3052 /*---------------------------------------------------------*/ 3053 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3054 if (m == PETSC_DECIDE) { 3055 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3056 } else { 3057 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3058 } 3059 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3060 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3061 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3062 3063 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3064 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3065 3066 /* determine the number of messages to send, their lengths */ 3067 /*---------------------------------------------------------*/ 3068 len_s = merge->len_s; 3069 3070 len = 0; /* length of buf_si[] */ 3071 merge->nsend = 0; 3072 for (proc=0; proc<size; proc++){ 3073 len_si[proc] = 0; 3074 if (proc == rank){ 3075 len_s[proc] = 0; 3076 } else { 3077 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3078 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3079 } 3080 if (len_s[proc]) { 3081 merge->nsend++; 3082 nrows = 0; 3083 for (i=owners[proc]; i<owners[proc+1]; i++){ 3084 if (ai[i+1] > ai[i]) nrows++; 3085 } 3086 len_si[proc] = 2*(nrows+1); 3087 len += len_si[proc]; 3088 } 3089 } 3090 3091 /* determine the number and length of messages to receive for ij-structure */ 3092 /*-------------------------------------------------------------------------*/ 3093 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3094 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3095 3096 /* post the Irecv of j-structure */ 3097 /*-------------------------------*/ 3098 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3099 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3100 3101 /* post the Isend of j-structure */ 3102 /*--------------------------------*/ 3103 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3104 sj_waits = si_waits + merge->nsend; 3105 3106 for (proc=0, k=0; proc<size; proc++){ 3107 if (!len_s[proc]) continue; 3108 i = owners[proc]; 3109 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3110 k++; 3111 } 3112 3113 /* receives and sends of j-structure are complete */ 3114 /*------------------------------------------------*/ 3115 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3116 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3117 3118 /* send and recv i-structure */ 3119 /*---------------------------*/ 3120 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3121 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3122 3123 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3124 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3125 for (proc=0,k=0; proc<size; proc++){ 3126 if (!len_s[proc]) continue; 3127 /* form outgoing message for i-structure: 3128 buf_si[0]: nrows to be sent 3129 [1:nrows]: row index (global) 3130 [nrows+1:2*nrows+1]: i-structure index 3131 */ 3132 /*-------------------------------------------*/ 3133 nrows = len_si[proc]/2 - 1; 3134 buf_si_i = buf_si + nrows+1; 3135 buf_si[0] = nrows; 3136 buf_si_i[0] = 0; 3137 nrows = 0; 3138 for (i=owners[proc]; i<owners[proc+1]; i++){ 3139 anzi = ai[i+1] - ai[i]; 3140 if (anzi) { 3141 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3142 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3143 nrows++; 3144 } 3145 } 3146 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3147 k++; 3148 buf_si += len_si[proc]; 3149 } 3150 3151 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3152 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3153 3154 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr); 3155 for (i=0; i<merge->nrecv; i++){ 3156 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr); 3157 } 3158 3159 ierr = PetscFree(len_si);CHKERRQ(ierr); 3160 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3161 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3162 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3163 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3164 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3165 ierr = PetscFree(status);CHKERRQ(ierr); 3166 3167 /* compute a local seq matrix in each processor */ 3168 /*----------------------------------------------*/ 3169 /* allocate bi array and free space for accumulating nonzero column info */ 3170 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3171 bi[0] = 0; 3172 3173 /* create and initialize a linked list */ 3174 nlnk = N+1; 3175 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3176 3177 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3178 len = 0; 3179 len = ai[owners[rank+1]] - ai[owners[rank]]; 3180 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3181 current_space = free_space; 3182 3183 /* determine symbolic info for each local row */ 3184 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3185 nextrow = buf_ri_k + merge->nrecv; 3186 nextai = nextrow + merge->nrecv; 3187 for (k=0; k<merge->nrecv; k++){ 3188 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3189 nrows = *buf_ri_k[k]; 3190 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3191 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3192 } 3193 3194 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3195 len = 0; 3196 for (i=0;i<m;i++) { 3197 bnzi = 0; 3198 /* add local non-zero cols of this proc's seqmat into lnk */ 3199 arow = owners[rank] + i; 3200 anzi = ai[arow+1] - ai[arow]; 3201 aj = a->j + ai[arow]; 3202 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3203 bnzi += nlnk; 3204 /* add received col data into lnk */ 3205 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3206 if (i == *nextrow[k]) { /* i-th row */ 3207 anzi = *(nextai[k]+1) - *nextai[k]; 3208 aj = buf_rj[k] + *nextai[k]; 3209 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3210 bnzi += nlnk; 3211 nextrow[k]++; nextai[k]++; 3212 } 3213 } 3214 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3215 3216 /* if free space is not available, make more free space */ 3217 if (current_space->local_remaining<bnzi) { 3218 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3219 nspacedouble++; 3220 } 3221 /* copy data into free space, then initialize lnk */ 3222 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3223 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3224 3225 current_space->array += bnzi; 3226 current_space->local_used += bnzi; 3227 current_space->local_remaining -= bnzi; 3228 3229 bi[i+1] = bi[i] + bnzi; 3230 } 3231 3232 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3233 3234 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3235 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3236 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3237 3238 /* create symbolic parallel matrix B_mpi */ 3239 /*---------------------------------------*/ 3240 if (n==PETSC_DECIDE) { 3241 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3242 } else { 3243 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3244 } 3245 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3246 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3247 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3248 3249 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3250 B_mpi->assembled = PETSC_FALSE; 3251 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3252 merge->bi = bi; 3253 merge->bj = bj; 3254 merge->buf_ri = buf_ri; 3255 merge->buf_rj = buf_rj; 3256 merge->coi = PETSC_NULL; 3257 merge->coj = PETSC_NULL; 3258 merge->owners_co = PETSC_NULL; 3259 3260 /* attach the supporting struct to B_mpi for reuse */ 3261 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3262 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3263 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3264 *mpimat = B_mpi; 3265 3266 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3267 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3268 PetscFunctionReturn(0); 3269 } 3270 3271 static PetscEvent logkey_seqstompi = 0; 3272 #undef __FUNCT__ 3273 #define __FUNCT__ "MatMerge_SeqsToMPI" 3274 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3275 { 3276 PetscErrorCode ierr; 3277 3278 PetscFunctionBegin; 3279 if (!logkey_seqstompi) { 3280 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3281 } 3282 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3283 if (scall == MAT_INITIAL_MATRIX){ 3284 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3285 } 3286 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3287 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3288 PetscFunctionReturn(0); 3289 } 3290 static PetscEvent logkey_getlocalmat = 0; 3291 #undef __FUNCT__ 3292 #define __FUNCT__ "MatGetLocalMat" 3293 /*@C 3294 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3295 3296 Not Collective 3297 3298 Input Parameters: 3299 + A - the matrix 3300 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3301 3302 Output Parameter: 3303 . A_loc - the local sequential matrix generated 3304 3305 Level: developer 3306 3307 @*/ 3308 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3309 { 3310 PetscErrorCode ierr; 3311 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3312 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3313 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3314 PetscScalar *aa=a->a,*ba=b->a,*ca; 3315 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3316 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3317 3318 PetscFunctionBegin; 3319 if (!logkey_getlocalmat) { 3320 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3321 } 3322 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3323 if (scall == MAT_INITIAL_MATRIX){ 3324 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3325 ci[0] = 0; 3326 for (i=0; i<am; i++){ 3327 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3328 } 3329 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3330 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3331 k = 0; 3332 for (i=0; i<am; i++) { 3333 ncols_o = bi[i+1] - bi[i]; 3334 ncols_d = ai[i+1] - ai[i]; 3335 /* off-diagonal portion of A */ 3336 for (jo=0; jo<ncols_o; jo++) { 3337 col = cmap[*bj]; 3338 if (col >= cstart) break; 3339 cj[k] = col; bj++; 3340 ca[k++] = *ba++; 3341 } 3342 /* diagonal portion of A */ 3343 for (j=0; j<ncols_d; j++) { 3344 cj[k] = cstart + *aj++; 3345 ca[k++] = *aa++; 3346 } 3347 /* off-diagonal portion of A */ 3348 for (j=jo; j<ncols_o; j++) { 3349 cj[k] = cmap[*bj++]; 3350 ca[k++] = *ba++; 3351 } 3352 } 3353 /* put together the new matrix */ 3354 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3355 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3356 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3357 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3358 mat->freedata = PETSC_TRUE; 3359 mat->nonew = 0; 3360 } else if (scall == MAT_REUSE_MATRIX){ 3361 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3362 ci = mat->i; cj = mat->j; ca = mat->a; 3363 for (i=0; i<am; i++) { 3364 /* off-diagonal portion of A */ 3365 ncols_o = bi[i+1] - bi[i]; 3366 for (jo=0; jo<ncols_o; jo++) { 3367 col = cmap[*bj]; 3368 if (col >= cstart) break; 3369 *ca++ = *ba++; bj++; 3370 } 3371 /* diagonal portion of A */ 3372 ncols_d = ai[i+1] - ai[i]; 3373 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3374 /* off-diagonal portion of A */ 3375 for (j=jo; j<ncols_o; j++) { 3376 *ca++ = *ba++; bj++; 3377 } 3378 } 3379 } else { 3380 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3381 } 3382 3383 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3384 PetscFunctionReturn(0); 3385 } 3386 3387 static PetscEvent logkey_getlocalmatcondensed = 0; 3388 #undef __FUNCT__ 3389 #define __FUNCT__ "MatGetLocalMatCondensed" 3390 /*@C 3391 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3392 3393 Not Collective 3394 3395 Input Parameters: 3396 + A - the matrix 3397 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3398 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3399 3400 Output Parameter: 3401 . A_loc - the local sequential matrix generated 3402 3403 Level: developer 3404 3405 @*/ 3406 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3407 { 3408 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3409 PetscErrorCode ierr; 3410 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3411 IS isrowa,iscola; 3412 Mat *aloc; 3413 3414 PetscFunctionBegin; 3415 if (!logkey_getlocalmatcondensed) { 3416 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3417 } 3418 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3419 if (!row){ 3420 start = a->rstart; end = a->rend; 3421 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3422 } else { 3423 isrowa = *row; 3424 } 3425 if (!col){ 3426 start = a->cstart; 3427 cmap = a->garray; 3428 nzA = a->A->n; 3429 nzB = a->B->n; 3430 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3431 ncols = 0; 3432 for (i=0; i<nzB; i++) { 3433 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3434 else break; 3435 } 3436 imark = i; 3437 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3438 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3439 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3440 ierr = PetscFree(idx);CHKERRQ(ierr); 3441 } else { 3442 iscola = *col; 3443 } 3444 if (scall != MAT_INITIAL_MATRIX){ 3445 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3446 aloc[0] = *A_loc; 3447 } 3448 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3449 *A_loc = aloc[0]; 3450 ierr = PetscFree(aloc);CHKERRQ(ierr); 3451 if (!row){ 3452 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3453 } 3454 if (!col){ 3455 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3456 } 3457 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3458 PetscFunctionReturn(0); 3459 } 3460 3461 static PetscEvent logkey_GetBrowsOfAcols = 0; 3462 #undef __FUNCT__ 3463 #define __FUNCT__ "MatGetBrowsOfAcols" 3464 /*@C 3465 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3466 3467 Collective on Mat 3468 3469 Input Parameters: 3470 + A,B - the matrices in mpiaij format 3471 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3472 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3473 3474 Output Parameter: 3475 + rowb, colb - index sets of rows and columns of B to extract 3476 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3477 - B_seq - the sequential matrix generated 3478 3479 Level: developer 3480 3481 @*/ 3482 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3483 { 3484 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3485 PetscErrorCode ierr; 3486 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3487 IS isrowb,iscolb; 3488 Mat *bseq; 3489 3490 PetscFunctionBegin; 3491 if (a->cstart != b->rstart || a->cend != b->rend){ 3492 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3493 } 3494 if (!logkey_GetBrowsOfAcols) { 3495 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3496 } 3497 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3498 3499 if (scall == MAT_INITIAL_MATRIX){ 3500 start = a->cstart; 3501 cmap = a->garray; 3502 nzA = a->A->n; 3503 nzB = a->B->n; 3504 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3505 ncols = 0; 3506 for (i=0; i<nzB; i++) { /* row < local row index */ 3507 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3508 else break; 3509 } 3510 imark = i; 3511 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3512 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3513 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3514 ierr = PetscFree(idx);CHKERRQ(ierr); 3515 *brstart = imark; 3516 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3517 } else { 3518 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3519 isrowb = *rowb; iscolb = *colb; 3520 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3521 bseq[0] = *B_seq; 3522 } 3523 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3524 *B_seq = bseq[0]; 3525 ierr = PetscFree(bseq);CHKERRQ(ierr); 3526 if (!rowb){ 3527 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3528 } else { 3529 *rowb = isrowb; 3530 } 3531 if (!colb){ 3532 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3533 } else { 3534 *colb = iscolb; 3535 } 3536 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3537 PetscFunctionReturn(0); 3538 } 3539 3540 static PetscEvent logkey_GetBrowsOfAocols = 0; 3541 #undef __FUNCT__ 3542 #define __FUNCT__ "MatGetBrowsOfAoCols" 3543 /*@C 3544 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3545 of the OFF-DIAGONAL portion of local A 3546 3547 Collective on Mat 3548 3549 Input Parameters: 3550 + A,B - the matrices in mpiaij format 3551 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3552 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3553 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3554 3555 Output Parameter: 3556 + B_oth - the sequential matrix generated 3557 3558 Level: developer 3559 3560 @*/ 3561 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3562 { 3563 VecScatter_MPI_General *gen_to,*gen_from; 3564 PetscErrorCode ierr; 3565 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3566 Mat_SeqAIJ *b_oth; 3567 VecScatter ctx=a->Mvctx; 3568 MPI_Comm comm=ctx->comm; 3569 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3570 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3571 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3572 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3573 MPI_Request *rwaits,*swaits; 3574 MPI_Status *sstatus,rstatus; 3575 PetscInt *cols; 3576 PetscScalar *vals; 3577 PetscMPIInt j; 3578 3579 PetscFunctionBegin; 3580 if (a->cstart != b->rstart || a->cend != b->rend){ 3581 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3582 } 3583 if (!logkey_GetBrowsOfAocols) { 3584 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3585 } 3586 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3587 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3588 3589 gen_to = (VecScatter_MPI_General*)ctx->todata; 3590 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3591 rvalues = gen_from->values; /* holds the length of sending row */ 3592 svalues = gen_to->values; /* holds the length of receiving row */ 3593 nrecvs = gen_from->n; 3594 nsends = gen_to->n; 3595 rwaits = gen_from->requests; 3596 swaits = gen_to->requests; 3597 srow = gen_to->indices; /* local row index to be sent */ 3598 rstarts = gen_from->starts; 3599 sstarts = gen_to->starts; 3600 rprocs = gen_from->procs; 3601 sprocs = gen_to->procs; 3602 sstatus = gen_to->sstatus; 3603 3604 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3605 if (scall == MAT_INITIAL_MATRIX){ 3606 /* i-array */ 3607 /*---------*/ 3608 /* post receives */ 3609 for (i=0; i<nrecvs; i++){ 3610 rowlen = (PetscInt*)rvalues + rstarts[i]; 3611 nrows = rstarts[i+1]-rstarts[i]; 3612 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3613 } 3614 3615 /* pack the outgoing message */ 3616 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3617 rstartsj = sstartsj + nsends +1; 3618 sstartsj[0] = 0; rstartsj[0] = 0; 3619 len = 0; /* total length of j or a array to be sent */ 3620 k = 0; 3621 for (i=0; i<nsends; i++){ 3622 rowlen = (PetscInt*)svalues + sstarts[i]; 3623 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3624 for (j=0; j<nrows; j++) { 3625 row = srow[k] + b->rowners[rank]; /* global row idx */ 3626 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3627 len += rowlen[j]; 3628 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3629 k++; 3630 } 3631 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3632 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3633 } 3634 /* recvs and sends of i-array are completed */ 3635 i = nrecvs; 3636 while (i--) { 3637 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3638 } 3639 if (nsends) { 3640 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3641 } 3642 /* allocate buffers for sending j and a arrays */ 3643 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3644 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3645 3646 /* create i-array of B_oth */ 3647 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3648 b_othi[0] = 0; 3649 len = 0; /* total length of j or a array to be received */ 3650 k = 0; 3651 for (i=0; i<nrecvs; i++){ 3652 rowlen = (PetscInt*)rvalues + rstarts[i]; 3653 nrows = rstarts[i+1]-rstarts[i]; 3654 for (j=0; j<nrows; j++) { 3655 b_othi[k+1] = b_othi[k] + rowlen[j]; 3656 len += rowlen[j]; k++; 3657 } 3658 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3659 } 3660 3661 /* allocate space for j and a arrrays of B_oth */ 3662 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3663 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3664 3665 /* j-array */ 3666 /*---------*/ 3667 /* post receives of j-array */ 3668 for (i=0; i<nrecvs; i++){ 3669 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3670 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3671 } 3672 k = 0; 3673 for (i=0; i<nsends; i++){ 3674 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3675 bufJ = bufj+sstartsj[i]; 3676 for (j=0; j<nrows; j++) { 3677 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3678 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3679 for (l=0; l<ncols; l++){ 3680 *bufJ++ = cols[l]; 3681 } 3682 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3683 } 3684 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3685 } 3686 3687 /* recvs and sends of j-array are completed */ 3688 i = nrecvs; 3689 while (i--) { 3690 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3691 } 3692 if (nsends) { 3693 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3694 } 3695 } else if (scall == MAT_REUSE_MATRIX){ 3696 sstartsj = *startsj; 3697 rstartsj = sstartsj + nsends +1; 3698 bufa = *bufa_ptr; 3699 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3700 b_otha = b_oth->a; 3701 } else { 3702 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3703 } 3704 3705 /* a-array */ 3706 /*---------*/ 3707 /* post receives of a-array */ 3708 for (i=0; i<nrecvs; i++){ 3709 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3710 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3711 } 3712 k = 0; 3713 for (i=0; i<nsends; i++){ 3714 nrows = sstarts[i+1]-sstarts[i]; 3715 bufA = bufa+sstartsj[i]; 3716 for (j=0; j<nrows; j++) { 3717 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3718 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3719 for (l=0; l<ncols; l++){ 3720 *bufA++ = vals[l]; 3721 } 3722 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3723 3724 } 3725 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3726 } 3727 /* recvs and sends of a-array are completed */ 3728 i = nrecvs; 3729 while (i--) { 3730 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3731 } 3732 if (nsends) { 3733 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3734 } 3735 3736 if (scall == MAT_INITIAL_MATRIX){ 3737 /* put together the new matrix */ 3738 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3739 3740 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3741 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3742 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3743 b_oth->freedata = PETSC_TRUE; 3744 b_oth->nonew = 0; 3745 3746 ierr = PetscFree(bufj);CHKERRQ(ierr); 3747 if (!startsj || !bufa_ptr){ 3748 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3749 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3750 } else { 3751 *startsj = sstartsj; 3752 *bufa_ptr = bufa; 3753 } 3754 } 3755 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3756 3757 PetscFunctionReturn(0); 3758 } 3759 3760 /*MC 3761 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3762 3763 Options Database Keys: 3764 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3765 3766 Level: beginner 3767 3768 .seealso: MatCreateMPIAIJ 3769 M*/ 3770 3771 EXTERN_C_BEGIN 3772 #undef __FUNCT__ 3773 #define __FUNCT__ "MatCreate_MPIAIJ" 3774 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3775 { 3776 Mat_MPIAIJ *b; 3777 PetscErrorCode ierr; 3778 PetscInt i; 3779 PetscMPIInt size; 3780 3781 PetscFunctionBegin; 3782 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3783 3784 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3785 B->data = (void*)b; 3786 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3787 B->factor = 0; 3788 B->bs = 1; 3789 B->assembled = PETSC_FALSE; 3790 B->mapping = 0; 3791 3792 B->insertmode = NOT_SET_VALUES; 3793 b->size = size; 3794 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3795 3796 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3797 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3798 3799 /* the information in the maps duplicates the information computed below, eventually 3800 we should remove the duplicate information that is not contained in the maps */ 3801 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3802 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3803 3804 /* build local table of row and column ownerships */ 3805 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3806 ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3807 b->cowners = b->rowners + b->size + 2; 3808 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3809 b->rowners[0] = 0; 3810 for (i=2; i<=b->size; i++) { 3811 b->rowners[i] += b->rowners[i-1]; 3812 } 3813 b->rstart = b->rowners[b->rank]; 3814 b->rend = b->rowners[b->rank+1]; 3815 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3816 b->cowners[0] = 0; 3817 for (i=2; i<=b->size; i++) { 3818 b->cowners[i] += b->cowners[i-1]; 3819 } 3820 b->cstart = b->cowners[b->rank]; 3821 b->cend = b->cowners[b->rank+1]; 3822 3823 /* build cache for off array entries formed */ 3824 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3825 b->donotstash = PETSC_FALSE; 3826 b->colmap = 0; 3827 b->garray = 0; 3828 b->roworiented = PETSC_TRUE; 3829 3830 /* stuff used for matrix vector multiply */ 3831 b->lvec = PETSC_NULL; 3832 b->Mvctx = PETSC_NULL; 3833 3834 /* stuff for MatGetRow() */ 3835 b->rowindices = 0; 3836 b->rowvalues = 0; 3837 b->getrowactive = PETSC_FALSE; 3838 3839 /* Explicitly create 2 MATSEQAIJ matrices. */ 3840 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3841 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3842 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3843 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3844 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3845 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3846 3847 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3848 "MatStoreValues_MPIAIJ", 3849 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3850 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3851 "MatRetrieveValues_MPIAIJ", 3852 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3853 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3854 "MatGetDiagonalBlock_MPIAIJ", 3855 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3856 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3857 "MatIsTranspose_MPIAIJ", 3858 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3859 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3860 "MatMPIAIJSetPreallocation_MPIAIJ", 3861 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3862 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3863 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3864 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3865 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3866 "MatDiagonalScaleLocal_MPIAIJ", 3867 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3868 PetscFunctionReturn(0); 3869 } 3870 EXTERN_C_END 3871