1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 #define CHUNKSIZE 15 37 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 38 { \ 39 if (lastcol1 > col) low1 = 0; else high1 = nrow1; \ 40 lastcol1 = col;\ 41 while (high1-low1 > 5) { \ 42 t = (low1+high1)/2; \ 43 if (rp1[t] > col) high1 = t; \ 44 else low1 = t; \ 45 } \ 46 for (_i=low1; _i<high1; _i++) { \ 47 if (rp1[_i] > col) break; \ 48 if (rp1[_i] == col) { \ 49 if (addv == ADD_VALUES) ap1[_i] += value; \ 50 else ap1[_i] = value; \ 51 goto a_noinsert; \ 52 } \ 53 } \ 54 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 55 if (nonew == 1) goto a_noinsert; \ 56 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 57 if (nrow1 >= rmax1) { \ 58 /* there is no extra room in row, therefore enlarge */ \ 59 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 60 PetscScalar *new_a; \ 61 \ 62 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 63 \ 64 /* malloc new storage space */ \ 65 ierr = PetscMalloc3(new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,am+1,PetscInt,&new_i);CHKERRQ(ierr);\ 66 \ 67 /* copy over old data into new slots */ \ 68 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 69 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 70 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow1)*sizeof(PetscInt));CHKERRQ(ierr); \ 71 len = (new_nz - CHUNKSIZE - ai[row] - nrow1); \ 72 ierr = PetscMemcpy(new_j+ai[row]+nrow1+CHUNKSIZE,aj+ai[row]+nrow1,len*sizeof(PetscInt));CHKERRQ(ierr); \ 73 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow1)*sizeof(PetscScalar));CHKERRQ(ierr); \ 74 ierr = PetscMemcpy(new_a+ai[row]+nrow1+CHUNKSIZE,aa+ai[row]+nrow1,len*sizeof(PetscScalar));CHKERRQ(ierr); \ 75 /* free up old matrix storage */ \ 76 \ 77 if (!a->singlemalloc) {\ 78 ierr = PetscFree(a->a);CHKERRQ(ierr);\ 79 ierr = PetscFree(a->i);CHKERRQ(ierr);\ 80 ierr = PetscFree(a->j);CHKERRQ(ierr);\ 81 } else {\ 82 ierr = PetscFree3(a->a,a->i,a->j);CHKERRQ(ierr);\ 83 }\ 84 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 85 a->singlemalloc = PETSC_TRUE; \ 86 \ 87 rp1 = aj + ai[row]; ap1 = aa + ai[row]; \ 88 rmax1 = aimax[row] = aimax[row] + CHUNKSIZE; \ 89 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \ 90 a->maxnz += CHUNKSIZE; \ 91 a->reallocs++; \ 92 } \ 93 N = nrow1++ - 1; a->nz++; \ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp1[ii+1] = rp1[ii]; \ 97 ap1[ii+1] = ap1[ii]; \ 98 } \ 99 rp1[_i] = col; \ 100 ap1[_i] = value; \ 101 a_noinsert: ; \ 102 ailen[row] = nrow1; \ 103 } 104 105 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 106 { \ 107 if (lastcol2 > col) low2 = 0; else high2 = nrow2; \ 108 lastcol2 = col;\ 109 while (high2-low2 > 5) { \ 110 t = (low2+high2)/2; \ 111 if (rp2[t] > col) high2 = t; \ 112 else low2 = t; \ 113 } \ 114 for (_i=low2; _i<high2; _i++) { \ 115 if (rp2[_i] > col) break; \ 116 if (rp2[_i] == col) { \ 117 if (addv == ADD_VALUES) ap2[_i] += value; \ 118 else ap2[_i] = value; \ 119 goto b_noinsert; \ 120 } \ 121 } \ 122 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 123 if (nonew == 1) goto b_noinsert; \ 124 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 125 if (nrow2 >= rmax2) { \ 126 /* there is no extra room in row, therefore enlarge */ \ 127 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 128 PetscScalar *new_a; \ 129 \ 130 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 131 \ 132 /* malloc new storage space */ \ 133 ierr = PetscMalloc3(new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,bm+1,PetscInt,&new_i);CHKERRQ(ierr);\ 134 \ 135 /* copy over old data into new slots */ \ 136 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 137 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 138 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow2)*sizeof(PetscInt));CHKERRQ(ierr); \ 139 len = (new_nz - CHUNKSIZE - bi[row] - nrow2); \ 140 ierr = PetscMemcpy(new_j+bi[row]+nrow2+CHUNKSIZE,bj+bi[row]+nrow2,len*sizeof(PetscInt));CHKERRQ(ierr); \ 141 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow2)*sizeof(PetscScalar));CHKERRQ(ierr); \ 142 ierr = PetscMemcpy(new_a+bi[row]+nrow2+CHUNKSIZE,ba+bi[row]+nrow2,len*sizeof(PetscScalar));CHKERRQ(ierr); \ 143 /* free up old matrix storage */ \ 144 \ 145 if (!a->singlemalloc) {\ 146 ierr = PetscFree(b->a);CHKERRQ(ierr);\ 147 ierr = PetscFree(b->i);CHKERRQ(ierr);\ 148 ierr = PetscFree(b->j);CHKERRQ(ierr);\ 149 } else {\ 150 ierr = PetscFree3(b->a,b->i,b->j);CHKERRQ(ierr);\ 151 }\ 152 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 153 b->singlemalloc = PETSC_TRUE; \ 154 \ 155 rp2 = bj + bi[row]; ap2 = ba + bi[row]; \ 156 rmax2 = bimax[row] = bimax[row] + CHUNKSIZE; \ 157 ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \ 158 b->maxnz += CHUNKSIZE; \ 159 b->reallocs++; \ 160 } \ 161 N = nrow2++ - 1; b->nz++; \ 162 /* shift up all the later entries in this row */ \ 163 for (ii=N; ii>=_i; ii--) { \ 164 rp2[ii+1] = rp2[ii]; \ 165 ap2[ii+1] = ap2[ii]; \ 166 } \ 167 rp2[_i] = col; \ 168 ap2[_i] = value; \ 169 b_noinsert: ; \ 170 bilen[row] = nrow2; \ 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatSetValues_MPIAIJ" 175 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 176 { 177 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 178 PetscScalar value; 179 PetscErrorCode ierr; 180 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 181 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 182 PetscTruth roworiented = aij->roworiented; 183 184 /* Some Variables required in the macro */ 185 Mat A = aij->A; 186 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 187 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 188 PetscScalar *aa = a->a; 189 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 190 Mat B = aij->B; 191 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 192 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 193 PetscScalar *ba = b->a; 194 195 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 196 PetscInt nonew = a->nonew; 197 PetscScalar *ap1,*ap2; 198 199 PetscFunctionBegin; 200 for (i=0; i<m; i++) { 201 if (im[i] < 0) continue; 202 #if defined(PETSC_USE_DEBUG) 203 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 204 #endif 205 if (im[i] >= rstart && im[i] < rend) { 206 row = im[i] - rstart; 207 lastcol1 = -1; 208 rp1 = aj + ai[row]; 209 ap1 = aa + ai[row]; 210 rmax1 = aimax[row]; 211 nrow1 = ailen[row]; 212 low1 = 0; 213 high1 = nrow1; 214 lastcol2 = -1; 215 rp2 = bj + bi[row]; 216 ap2 = ba + bi[row]; 217 rmax2 = bimax[row]; 218 nrow2 = bilen[row]; 219 low2 = 0; 220 high2 = nrow2; 221 222 for (j=0; j<n; j++) { 223 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 224 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 225 if (in[j] >= cstart && in[j] < cend){ 226 col = in[j] - cstart; 227 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 228 } else if (in[j] < 0) continue; 229 #if defined(PETSC_USE_DEBUG) 230 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 231 #endif 232 else { 233 if (mat->was_assembled) { 234 if (!aij->colmap) { 235 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 236 } 237 #if defined (PETSC_USE_CTABLE) 238 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 239 col--; 240 #else 241 col = aij->colmap[in[j]] - 1; 242 #endif 243 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 244 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 245 col = in[j]; 246 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 247 B = aij->B; 248 b = (Mat_SeqAIJ*)B->data; 249 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 250 rp2 = bj + bi[row]; 251 ap2 = ba + bi[row]; 252 rmax2 = bimax[row]; 253 nrow2 = bilen[row]; 254 low2 = 0; 255 high2 = nrow2; 256 ba = b->a; 257 } 258 } else col = in[j]; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 } 261 } 262 } else { 263 if (!aij->donotstash) { 264 if (roworiented) { 265 if (ignorezeroentries && v[i*n] == 0.0) continue; 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 267 } else { 268 if (ignorezeroentries && v[i] == 0.0) continue; 269 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 270 } 271 } 272 } 273 } 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatGetValues_MPIAIJ" 279 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 280 { 281 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 282 PetscErrorCode ierr; 283 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 284 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 289 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 294 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 PetscErrorCode ierr; 327 PetscInt nstash,reallocs; 328 InsertMode addv; 329 330 PetscFunctionBegin; 331 if (aij->donotstash) { 332 PetscFunctionReturn(0); 333 } 334 335 /* make sure all processors are either in INSERTMODE or ADDMODE */ 336 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 337 if (addv == (ADD_VALUES|INSERT_VALUES)) { 338 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 339 } 340 mat->insertmode = addv; /* in case this processor had no cache */ 341 342 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 343 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 344 ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 354 PetscErrorCode ierr; 355 PetscMPIInt n; 356 PetscInt i,j,rstart,ncols,flg; 357 PetscInt *row,*col,other_disassembled; 358 PetscScalar *val; 359 InsertMode addv = mat->insertmode; 360 361 PetscFunctionBegin; 362 if (!aij->donotstash) { 363 while (1) { 364 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 365 if (!flg) break; 366 367 for (i=0; i<n;) { 368 /* Now identify the consecutive vals belonging to the same row */ 369 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 370 if (j < n) ncols = j-i; 371 else ncols = n-i; 372 /* Now assemble all these values with a single function call */ 373 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 374 i = j; 375 } 376 } 377 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 378 } 379 a->compressedrow.use = PETSC_FALSE; 380 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 381 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 382 383 /* determine if any processor has disassembled, if so we must 384 also disassemble ourselfs, in order that we may reassemble. */ 385 /* 386 if nonzero structure of submatrix B cannot change then we know that 387 no processor disassembled thus we can skip this stuff 388 */ 389 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 390 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 391 if (mat->was_assembled && !other_disassembled) { 392 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 393 } 394 } 395 /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */ 396 b = (Mat_SeqAIJ *)aij->B->data; 397 398 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 399 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 400 } 401 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 402 b->compressedrow.use = PETSC_TRUE; 403 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 404 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 405 406 if (aij->rowvalues) { 407 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 408 aij->rowvalues = 0; 409 } 410 411 /* used by MatAXPY() */ 412 a->xtoy = 0; b->xtoy = 0; 413 a->XtoY = 0; b->XtoY = 0; 414 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNCT__ 419 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 420 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 421 { 422 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 423 PetscErrorCode ierr; 424 425 PetscFunctionBegin; 426 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 427 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 428 PetscFunctionReturn(0); 429 } 430 431 #undef __FUNCT__ 432 #define __FUNCT__ "MatZeroRows_MPIAIJ" 433 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 434 { 435 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 436 PetscErrorCode ierr; 437 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 438 PetscInt i,N,*rows,*owners = l->rowners; 439 PetscInt *nprocs,j,idx,nsends,row; 440 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 441 PetscInt *rvalues,count,base,slen,*source; 442 PetscInt *lens,*lrows,*values,rstart=l->rstart; 443 MPI_Comm comm = A->comm; 444 MPI_Request *send_waits,*recv_waits; 445 MPI_Status recv_status,*send_status; 446 IS istmp; 447 #if defined(PETSC_DEBUG) 448 PetscTruth found = PETSC_FALSE; 449 #endif 450 451 PetscFunctionBegin; 452 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 453 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 454 455 /* first count number of contributors to each processor */ 456 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 457 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 458 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 459 j = 0; 460 for (i=0; i<N; i++) { 461 if (lastidx > (idx = rows[i])) j = 0; 462 lastidx = idx; 463 for (; j<size; j++) { 464 if (idx >= owners[j] && idx < owners[j+1]) { 465 nprocs[2*j]++; 466 nprocs[2*j+1] = 1; 467 owner[i] = j; 468 #if defined(PETSC_DEBUG) 469 found = PETSC_TRUE; 470 #endif 471 break; 472 } 473 } 474 #if defined(PETSC_DEBUG) 475 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 476 found = PETSC_FALSE; 477 #endif 478 } 479 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 480 481 /* inform other processors of number of messages and max length*/ 482 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 483 484 /* post receives: */ 485 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 486 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 487 for (i=0; i<nrecvs; i++) { 488 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 489 } 490 491 /* do sends: 492 1) starts[i] gives the starting index in svalues for stuff going to 493 the ith processor 494 */ 495 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 496 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 497 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 498 starts[0] = 0; 499 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 500 for (i=0; i<N; i++) { 501 svalues[starts[owner[i]]++] = rows[i]; 502 } 503 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 504 505 starts[0] = 0; 506 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 507 count = 0; 508 for (i=0; i<size; i++) { 509 if (nprocs[2*i+1]) { 510 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 511 } 512 } 513 ierr = PetscFree(starts);CHKERRQ(ierr); 514 515 base = owners[rank]; 516 517 /* wait on receives */ 518 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 519 source = lens + nrecvs; 520 count = nrecvs; slen = 0; 521 while (count) { 522 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 523 /* unpack receives into our local space */ 524 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 525 source[imdex] = recv_status.MPI_SOURCE; 526 lens[imdex] = n; 527 slen += n; 528 count--; 529 } 530 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 531 532 /* move the data into the send scatter */ 533 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 534 count = 0; 535 for (i=0; i<nrecvs; i++) { 536 values = rvalues + i*nmax; 537 for (j=0; j<lens[i]; j++) { 538 lrows[count++] = values[j] - base; 539 } 540 } 541 ierr = PetscFree(rvalues);CHKERRQ(ierr); 542 ierr = PetscFree(lens);CHKERRQ(ierr); 543 ierr = PetscFree(owner);CHKERRQ(ierr); 544 ierr = PetscFree(nprocs);CHKERRQ(ierr); 545 546 /* actually zap the local rows */ 547 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 548 ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr); 549 550 /* 551 Zero the required rows. If the "diagonal block" of the matrix 552 is square and the user wishes to set the diagonal we use seperate 553 code so that MatSetValues() is not called for each diagonal allocating 554 new memory, thus calling lots of mallocs and slowing things down. 555 556 Contributed by: Mathew Knepley 557 */ 558 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 559 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 560 if (diag && (l->A->M == l->A->N)) { 561 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 562 } else if (diag) { 563 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 564 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 565 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 566 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 567 } 568 for (i = 0; i < slen; i++) { 569 row = lrows[i] + rstart; 570 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 571 } 572 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 573 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 574 } else { 575 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 576 } 577 ierr = ISDestroy(istmp);CHKERRQ(ierr); 578 ierr = PetscFree(lrows);CHKERRQ(ierr); 579 580 /* wait on sends */ 581 if (nsends) { 582 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 583 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 584 ierr = PetscFree(send_status);CHKERRQ(ierr); 585 } 586 ierr = PetscFree(send_waits);CHKERRQ(ierr); 587 ierr = PetscFree(svalues);CHKERRQ(ierr); 588 589 PetscFunctionReturn(0); 590 } 591 592 #undef __FUNCT__ 593 #define __FUNCT__ "MatMult_MPIAIJ" 594 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 595 { 596 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 597 PetscErrorCode ierr; 598 PetscInt nt; 599 600 PetscFunctionBegin; 601 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 602 if (nt != A->n) { 603 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 604 } 605 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 606 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 607 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 608 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatMultAdd_MPIAIJ" 614 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 615 { 616 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 617 PetscErrorCode ierr; 618 619 PetscFunctionBegin; 620 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 621 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 622 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 623 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 624 PetscFunctionReturn(0); 625 } 626 627 #undef __FUNCT__ 628 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 629 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 630 { 631 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 632 PetscErrorCode ierr; 633 PetscTruth merged; 634 635 PetscFunctionBegin; 636 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 637 /* do nondiagonal part */ 638 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 639 if (!merged) { 640 /* send it on its way */ 641 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 642 /* do local part */ 643 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 644 /* receive remote parts: note this assumes the values are not actually */ 645 /* added in yy until the next line, */ 646 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 647 } else { 648 /* do local part */ 649 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 650 /* send it on its way */ 651 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 652 /* values actually were received in the Begin() but we need to call this nop */ 653 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 654 } 655 PetscFunctionReturn(0); 656 } 657 658 EXTERN_C_BEGIN 659 #undef __FUNCT__ 660 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 661 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 662 { 663 MPI_Comm comm; 664 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 665 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 666 IS Me,Notme; 667 PetscErrorCode ierr; 668 PetscInt M,N,first,last,*notme,i; 669 PetscMPIInt size; 670 671 PetscFunctionBegin; 672 673 /* Easy test: symmetric diagonal block */ 674 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 675 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 676 if (!*f) PetscFunctionReturn(0); 677 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 678 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 679 if (size == 1) PetscFunctionReturn(0); 680 681 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 682 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 683 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 684 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 685 for (i=0; i<first; i++) notme[i] = i; 686 for (i=last; i<M; i++) notme[i-last+first] = i; 687 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 688 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 689 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 690 Aoff = Aoffs[0]; 691 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 692 Boff = Boffs[0]; 693 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 694 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 695 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 696 ierr = ISDestroy(Me);CHKERRQ(ierr); 697 ierr = ISDestroy(Notme);CHKERRQ(ierr); 698 699 PetscFunctionReturn(0); 700 } 701 EXTERN_C_END 702 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 705 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 706 { 707 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 708 PetscErrorCode ierr; 709 710 PetscFunctionBegin; 711 /* do nondiagonal part */ 712 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 713 /* send it on its way */ 714 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 715 /* do local part */ 716 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 717 /* receive remote parts */ 718 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 719 PetscFunctionReturn(0); 720 } 721 722 /* 723 This only works correctly for square matrices where the subblock A->A is the 724 diagonal block 725 */ 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 728 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 729 { 730 PetscErrorCode ierr; 731 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 732 733 PetscFunctionBegin; 734 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 735 if (a->rstart != a->cstart || a->rend != a->cend) { 736 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 737 } 738 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatScale_MPIAIJ" 744 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 745 { 746 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 747 PetscErrorCode ierr; 748 749 PetscFunctionBegin; 750 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 751 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 752 PetscFunctionReturn(0); 753 } 754 755 #undef __FUNCT__ 756 #define __FUNCT__ "MatDestroy_MPIAIJ" 757 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 758 { 759 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 760 PetscErrorCode ierr; 761 762 PetscFunctionBegin; 763 #if defined(PETSC_USE_LOG) 764 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 765 #endif 766 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 767 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 768 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 769 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 770 #if defined (PETSC_USE_CTABLE) 771 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 772 #else 773 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 774 #endif 775 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 776 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 777 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 778 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 779 ierr = PetscFree(aij);CHKERRQ(ierr); 780 781 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 782 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 783 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 784 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 785 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 786 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 787 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 788 PetscFunctionReturn(0); 789 } 790 791 #undef __FUNCT__ 792 #define __FUNCT__ "MatView_MPIAIJ_Binary" 793 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 794 { 795 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 796 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 797 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 798 PetscErrorCode ierr; 799 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 800 int fd; 801 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 802 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 803 PetscScalar *column_values; 804 805 PetscFunctionBegin; 806 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 807 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 808 nz = A->nz + B->nz; 809 if (!rank) { 810 header[0] = MAT_FILE_COOKIE; 811 header[1] = mat->M; 812 header[2] = mat->N; 813 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 814 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 815 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 816 /* get largest number of rows any processor has */ 817 rlen = mat->m; 818 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 819 for (i=1; i<size; i++) { 820 rlen = PetscMax(rlen,range[i+1] - range[i]); 821 } 822 } else { 823 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 824 rlen = mat->m; 825 } 826 827 /* load up the local row counts */ 828 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 829 for (i=0; i<mat->m; i++) { 830 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 831 } 832 833 /* store the row lengths to the file */ 834 if (!rank) { 835 MPI_Status status; 836 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 837 for (i=1; i<size; i++) { 838 rlen = range[i+1] - range[i]; 839 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 840 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 841 } 842 } else { 843 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 844 } 845 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 846 847 /* load up the local column indices */ 848 nzmax = nz; /* )th processor needs space a largest processor needs */ 849 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 850 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 851 cnt = 0; 852 for (i=0; i<mat->m; i++) { 853 for (j=B->i[i]; j<B->i[i+1]; j++) { 854 if ( (col = garray[B->j[j]]) > cstart) break; 855 column_indices[cnt++] = col; 856 } 857 for (k=A->i[i]; k<A->i[i+1]; k++) { 858 column_indices[cnt++] = A->j[k] + cstart; 859 } 860 for (; j<B->i[i+1]; j++) { 861 column_indices[cnt++] = garray[B->j[j]]; 862 } 863 } 864 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 865 866 /* store the column indices to the file */ 867 if (!rank) { 868 MPI_Status status; 869 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 870 for (i=1; i<size; i++) { 871 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 872 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 873 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 874 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 875 } 876 } else { 877 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 878 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 879 } 880 ierr = PetscFree(column_indices);CHKERRQ(ierr); 881 882 /* load up the local column values */ 883 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 884 cnt = 0; 885 for (i=0; i<mat->m; i++) { 886 for (j=B->i[i]; j<B->i[i+1]; j++) { 887 if ( garray[B->j[j]] > cstart) break; 888 column_values[cnt++] = B->a[j]; 889 } 890 for (k=A->i[i]; k<A->i[i+1]; k++) { 891 column_values[cnt++] = A->a[k]; 892 } 893 for (; j<B->i[i+1]; j++) { 894 column_values[cnt++] = B->a[j]; 895 } 896 } 897 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 898 899 /* store the column values to the file */ 900 if (!rank) { 901 MPI_Status status; 902 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 903 for (i=1; i<size; i++) { 904 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 905 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 906 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 907 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 908 } 909 } else { 910 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 911 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 912 } 913 ierr = PetscFree(column_values);CHKERRQ(ierr); 914 PetscFunctionReturn(0); 915 } 916 917 #undef __FUNCT__ 918 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 919 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 920 { 921 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 922 PetscErrorCode ierr; 923 PetscMPIInt rank = aij->rank,size = aij->size; 924 PetscTruth isdraw,iascii,isbinary; 925 PetscViewer sviewer; 926 PetscViewerFormat format; 927 928 PetscFunctionBegin; 929 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 930 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 931 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 932 if (iascii) { 933 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 934 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 935 MatInfo info; 936 PetscTruth inodes; 937 938 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 939 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 940 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 941 if (!inodes) { 942 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 943 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 944 } else { 945 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 946 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 947 } 948 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 949 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 950 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 951 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 952 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 953 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } else if (format == PETSC_VIEWER_ASCII_INFO) { 956 PetscInt inodecount,inodelimit,*inodes; 957 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 958 if (inodes) { 959 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 960 } else { 961 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 962 } 963 PetscFunctionReturn(0); 964 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 965 PetscFunctionReturn(0); 966 } 967 } else if (isbinary) { 968 if (size == 1) { 969 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 970 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 971 } else { 972 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 973 } 974 PetscFunctionReturn(0); 975 } else if (isdraw) { 976 PetscDraw draw; 977 PetscTruth isnull; 978 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 979 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 980 } 981 982 if (size == 1) { 983 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 984 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 985 } else { 986 /* assemble the entire matrix onto first processor. */ 987 Mat A; 988 Mat_SeqAIJ *Aloc; 989 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 990 PetscScalar *a; 991 992 if (!rank) { 993 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 994 } else { 995 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 996 } 997 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 998 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 999 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1000 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1001 1002 /* copy over the A part */ 1003 Aloc = (Mat_SeqAIJ*)aij->A->data; 1004 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1005 row = aij->rstart; 1006 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 1007 for (i=0; i<m; i++) { 1008 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1009 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1010 } 1011 aj = Aloc->j; 1012 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 1013 1014 /* copy over the B part */ 1015 Aloc = (Mat_SeqAIJ*)aij->B->data; 1016 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1017 row = aij->rstart; 1018 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1019 ct = cols; 1020 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1021 for (i=0; i<m; i++) { 1022 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1023 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1024 } 1025 ierr = PetscFree(ct);CHKERRQ(ierr); 1026 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1027 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1028 /* 1029 Everyone has to call to draw the matrix since the graphics waits are 1030 synchronized across all processors that share the PetscDraw object 1031 */ 1032 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1033 if (!rank) { 1034 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1035 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1036 } 1037 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1038 ierr = MatDestroy(A);CHKERRQ(ierr); 1039 } 1040 PetscFunctionReturn(0); 1041 } 1042 1043 #undef __FUNCT__ 1044 #define __FUNCT__ "MatView_MPIAIJ" 1045 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1046 { 1047 PetscErrorCode ierr; 1048 PetscTruth iascii,isdraw,issocket,isbinary; 1049 1050 PetscFunctionBegin; 1051 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1052 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1053 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1054 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1055 if (iascii || isdraw || isbinary || issocket) { 1056 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1057 } else { 1058 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1059 } 1060 PetscFunctionReturn(0); 1061 } 1062 1063 1064 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "MatRelax_MPIAIJ" 1067 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1068 { 1069 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1070 PetscErrorCode ierr; 1071 Vec bb1; 1072 PetscScalar mone=-1.0; 1073 1074 PetscFunctionBegin; 1075 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1076 1077 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1078 1079 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1080 if (flag & SOR_ZERO_INITIAL_GUESS) { 1081 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1082 its--; 1083 } 1084 1085 while (its--) { 1086 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1087 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1088 1089 /* update rhs: bb1 = bb - B*x */ 1090 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1091 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1092 1093 /* local sweep */ 1094 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1095 CHKERRQ(ierr); 1096 } 1097 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1098 if (flag & SOR_ZERO_INITIAL_GUESS) { 1099 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1100 its--; 1101 } 1102 while (its--) { 1103 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1104 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1105 1106 /* update rhs: bb1 = bb - B*x */ 1107 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1108 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1109 1110 /* local sweep */ 1111 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1112 CHKERRQ(ierr); 1113 } 1114 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1115 if (flag & SOR_ZERO_INITIAL_GUESS) { 1116 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1117 its--; 1118 } 1119 while (its--) { 1120 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1121 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1122 1123 /* update rhs: bb1 = bb - B*x */ 1124 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1125 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1126 1127 /* local sweep */ 1128 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1129 CHKERRQ(ierr); 1130 } 1131 } else { 1132 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1133 } 1134 1135 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1136 PetscFunctionReturn(0); 1137 } 1138 1139 #undef __FUNCT__ 1140 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1141 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1142 { 1143 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1144 Mat A = mat->A,B = mat->B; 1145 PetscErrorCode ierr; 1146 PetscReal isend[5],irecv[5]; 1147 1148 PetscFunctionBegin; 1149 info->block_size = 1.0; 1150 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1151 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1152 isend[3] = info->memory; isend[4] = info->mallocs; 1153 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1154 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1155 isend[3] += info->memory; isend[4] += info->mallocs; 1156 if (flag == MAT_LOCAL) { 1157 info->nz_used = isend[0]; 1158 info->nz_allocated = isend[1]; 1159 info->nz_unneeded = isend[2]; 1160 info->memory = isend[3]; 1161 info->mallocs = isend[4]; 1162 } else if (flag == MAT_GLOBAL_MAX) { 1163 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1164 info->nz_used = irecv[0]; 1165 info->nz_allocated = irecv[1]; 1166 info->nz_unneeded = irecv[2]; 1167 info->memory = irecv[3]; 1168 info->mallocs = irecv[4]; 1169 } else if (flag == MAT_GLOBAL_SUM) { 1170 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1171 info->nz_used = irecv[0]; 1172 info->nz_allocated = irecv[1]; 1173 info->nz_unneeded = irecv[2]; 1174 info->memory = irecv[3]; 1175 info->mallocs = irecv[4]; 1176 } 1177 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1178 info->fill_ratio_needed = 0; 1179 info->factor_mallocs = 0; 1180 info->rows_global = (double)matin->M; 1181 info->columns_global = (double)matin->N; 1182 info->rows_local = (double)matin->m; 1183 info->columns_local = (double)matin->N; 1184 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatSetOption_MPIAIJ" 1190 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1191 { 1192 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1193 PetscErrorCode ierr; 1194 1195 PetscFunctionBegin; 1196 switch (op) { 1197 case MAT_NO_NEW_NONZERO_LOCATIONS: 1198 case MAT_YES_NEW_NONZERO_LOCATIONS: 1199 case MAT_COLUMNS_UNSORTED: 1200 case MAT_COLUMNS_SORTED: 1201 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1202 case MAT_KEEP_ZEROED_ROWS: 1203 case MAT_NEW_NONZERO_LOCATION_ERR: 1204 case MAT_USE_INODES: 1205 case MAT_DO_NOT_USE_INODES: 1206 case MAT_IGNORE_ZERO_ENTRIES: 1207 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1208 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1209 break; 1210 case MAT_ROW_ORIENTED: 1211 a->roworiented = PETSC_TRUE; 1212 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1213 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1214 break; 1215 case MAT_ROWS_SORTED: 1216 case MAT_ROWS_UNSORTED: 1217 case MAT_YES_NEW_DIAGONALS: 1218 ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr); 1219 break; 1220 case MAT_COLUMN_ORIENTED: 1221 a->roworiented = PETSC_FALSE; 1222 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1223 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1224 break; 1225 case MAT_IGNORE_OFF_PROC_ENTRIES: 1226 a->donotstash = PETSC_TRUE; 1227 break; 1228 case MAT_NO_NEW_DIAGONALS: 1229 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1230 case MAT_SYMMETRIC: 1231 case MAT_STRUCTURALLY_SYMMETRIC: 1232 case MAT_HERMITIAN: 1233 case MAT_SYMMETRY_ETERNAL: 1234 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1235 break; 1236 case MAT_NOT_SYMMETRIC: 1237 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1238 case MAT_NOT_HERMITIAN: 1239 case MAT_NOT_SYMMETRY_ETERNAL: 1240 break; 1241 default: 1242 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1243 } 1244 PetscFunctionReturn(0); 1245 } 1246 1247 #undef __FUNCT__ 1248 #define __FUNCT__ "MatGetRow_MPIAIJ" 1249 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1250 { 1251 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1252 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1253 PetscErrorCode ierr; 1254 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1255 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1256 PetscInt *cmap,*idx_p; 1257 1258 PetscFunctionBegin; 1259 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1260 mat->getrowactive = PETSC_TRUE; 1261 1262 if (!mat->rowvalues && (idx || v)) { 1263 /* 1264 allocate enough space to hold information from the longest row. 1265 */ 1266 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1267 PetscInt max = 1,tmp; 1268 for (i=0; i<matin->m; i++) { 1269 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1270 if (max < tmp) { max = tmp; } 1271 } 1272 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1273 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1274 } 1275 1276 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1277 lrow = row - rstart; 1278 1279 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1280 if (!v) {pvA = 0; pvB = 0;} 1281 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1282 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1283 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1284 nztot = nzA + nzB; 1285 1286 cmap = mat->garray; 1287 if (v || idx) { 1288 if (nztot) { 1289 /* Sort by increasing column numbers, assuming A and B already sorted */ 1290 PetscInt imark = -1; 1291 if (v) { 1292 *v = v_p = mat->rowvalues; 1293 for (i=0; i<nzB; i++) { 1294 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1295 else break; 1296 } 1297 imark = i; 1298 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1299 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1300 } 1301 if (idx) { 1302 *idx = idx_p = mat->rowindices; 1303 if (imark > -1) { 1304 for (i=0; i<imark; i++) { 1305 idx_p[i] = cmap[cworkB[i]]; 1306 } 1307 } else { 1308 for (i=0; i<nzB; i++) { 1309 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1310 else break; 1311 } 1312 imark = i; 1313 } 1314 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1315 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1316 } 1317 } else { 1318 if (idx) *idx = 0; 1319 if (v) *v = 0; 1320 } 1321 } 1322 *nz = nztot; 1323 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1324 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1330 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1331 { 1332 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1333 1334 PetscFunctionBegin; 1335 if (!aij->getrowactive) { 1336 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1337 } 1338 aij->getrowactive = PETSC_FALSE; 1339 PetscFunctionReturn(0); 1340 } 1341 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatNorm_MPIAIJ" 1344 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1345 { 1346 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1347 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1348 PetscErrorCode ierr; 1349 PetscInt i,j,cstart = aij->cstart; 1350 PetscReal sum = 0.0; 1351 PetscScalar *v; 1352 1353 PetscFunctionBegin; 1354 if (aij->size == 1) { 1355 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1356 } else { 1357 if (type == NORM_FROBENIUS) { 1358 v = amat->a; 1359 for (i=0; i<amat->nz; i++) { 1360 #if defined(PETSC_USE_COMPLEX) 1361 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1362 #else 1363 sum += (*v)*(*v); v++; 1364 #endif 1365 } 1366 v = bmat->a; 1367 for (i=0; i<bmat->nz; i++) { 1368 #if defined(PETSC_USE_COMPLEX) 1369 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1370 #else 1371 sum += (*v)*(*v); v++; 1372 #endif 1373 } 1374 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1375 *norm = sqrt(*norm); 1376 } else if (type == NORM_1) { /* max column norm */ 1377 PetscReal *tmp,*tmp2; 1378 PetscInt *jj,*garray = aij->garray; 1379 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1380 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1381 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1382 *norm = 0.0; 1383 v = amat->a; jj = amat->j; 1384 for (j=0; j<amat->nz; j++) { 1385 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1386 } 1387 v = bmat->a; jj = bmat->j; 1388 for (j=0; j<bmat->nz; j++) { 1389 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1390 } 1391 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1392 for (j=0; j<mat->N; j++) { 1393 if (tmp2[j] > *norm) *norm = tmp2[j]; 1394 } 1395 ierr = PetscFree(tmp);CHKERRQ(ierr); 1396 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1397 } else if (type == NORM_INFINITY) { /* max row norm */ 1398 PetscReal ntemp = 0.0; 1399 for (j=0; j<aij->A->m; j++) { 1400 v = amat->a + amat->i[j]; 1401 sum = 0.0; 1402 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1403 sum += PetscAbsScalar(*v); v++; 1404 } 1405 v = bmat->a + bmat->i[j]; 1406 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1407 sum += PetscAbsScalar(*v); v++; 1408 } 1409 if (sum > ntemp) ntemp = sum; 1410 } 1411 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1412 } else { 1413 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1414 } 1415 } 1416 PetscFunctionReturn(0); 1417 } 1418 1419 #undef __FUNCT__ 1420 #define __FUNCT__ "MatTranspose_MPIAIJ" 1421 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1422 { 1423 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1424 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1425 PetscErrorCode ierr; 1426 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1427 Mat B; 1428 PetscScalar *array; 1429 1430 PetscFunctionBegin; 1431 if (!matout && M != N) { 1432 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1433 } 1434 1435 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1436 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1437 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1438 1439 /* copy over the A part */ 1440 Aloc = (Mat_SeqAIJ*)a->A->data; 1441 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1442 row = a->rstart; 1443 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1444 for (i=0; i<m; i++) { 1445 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1446 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1447 } 1448 aj = Aloc->j; 1449 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1450 1451 /* copy over the B part */ 1452 Aloc = (Mat_SeqAIJ*)a->B->data; 1453 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1454 row = a->rstart; 1455 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1456 ct = cols; 1457 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1458 for (i=0; i<m; i++) { 1459 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1460 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1461 } 1462 ierr = PetscFree(ct);CHKERRQ(ierr); 1463 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1464 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1465 if (matout) { 1466 *matout = B; 1467 } else { 1468 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1469 } 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1475 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1476 { 1477 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1478 Mat a = aij->A,b = aij->B; 1479 PetscErrorCode ierr; 1480 PetscInt s1,s2,s3; 1481 1482 PetscFunctionBegin; 1483 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1484 if (rr) { 1485 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1486 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1487 /* Overlap communication with computation. */ 1488 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1489 } 1490 if (ll) { 1491 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1492 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1493 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1494 } 1495 /* scale the diagonal block */ 1496 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1497 1498 if (rr) { 1499 /* Do a scatter end and then right scale the off-diagonal block */ 1500 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1501 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1502 } 1503 1504 PetscFunctionReturn(0); 1505 } 1506 1507 1508 #undef __FUNCT__ 1509 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1510 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1511 { 1512 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1513 PetscErrorCode ierr; 1514 1515 PetscFunctionBegin; 1516 if (!a->rank) { 1517 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1518 } 1519 PetscFunctionReturn(0); 1520 } 1521 1522 #undef __FUNCT__ 1523 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1524 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1525 { 1526 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1527 PetscErrorCode ierr; 1528 1529 PetscFunctionBegin; 1530 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1531 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1532 PetscFunctionReturn(0); 1533 } 1534 #undef __FUNCT__ 1535 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1536 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1537 { 1538 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1539 PetscErrorCode ierr; 1540 1541 PetscFunctionBegin; 1542 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1543 PetscFunctionReturn(0); 1544 } 1545 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatEqual_MPIAIJ" 1548 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1549 { 1550 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1551 Mat a,b,c,d; 1552 PetscTruth flg; 1553 PetscErrorCode ierr; 1554 1555 PetscFunctionBegin; 1556 a = matA->A; b = matA->B; 1557 c = matB->A; d = matB->B; 1558 1559 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1560 if (flg) { 1561 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1562 } 1563 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1564 PetscFunctionReturn(0); 1565 } 1566 1567 #undef __FUNCT__ 1568 #define __FUNCT__ "MatCopy_MPIAIJ" 1569 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1570 { 1571 PetscErrorCode ierr; 1572 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1573 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1574 1575 PetscFunctionBegin; 1576 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1577 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1578 /* because of the column compression in the off-processor part of the matrix a->B, 1579 the number of columns in a->B and b->B may be different, hence we cannot call 1580 the MatCopy() directly on the two parts. If need be, we can provide a more 1581 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1582 then copying the submatrices */ 1583 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1584 } else { 1585 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1586 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1587 } 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1593 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1594 { 1595 PetscErrorCode ierr; 1596 1597 PetscFunctionBegin; 1598 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1599 PetscFunctionReturn(0); 1600 } 1601 1602 #include "petscblaslapack.h" 1603 #undef __FUNCT__ 1604 #define __FUNCT__ "MatAXPY_MPIAIJ" 1605 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1606 { 1607 PetscErrorCode ierr; 1608 PetscInt i; 1609 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1610 PetscBLASInt bnz,one=1; 1611 Mat_SeqAIJ *x,*y; 1612 1613 PetscFunctionBegin; 1614 if (str == SAME_NONZERO_PATTERN) { 1615 x = (Mat_SeqAIJ *)xx->A->data; 1616 y = (Mat_SeqAIJ *)yy->A->data; 1617 bnz = (PetscBLASInt)x->nz; 1618 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1619 x = (Mat_SeqAIJ *)xx->B->data; 1620 y = (Mat_SeqAIJ *)yy->B->data; 1621 bnz = (PetscBLASInt)x->nz; 1622 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1623 } else if (str == SUBSET_NONZERO_PATTERN) { 1624 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1625 1626 x = (Mat_SeqAIJ *)xx->B->data; 1627 y = (Mat_SeqAIJ *)yy->B->data; 1628 if (y->xtoy && y->XtoY != xx->B) { 1629 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1630 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1631 } 1632 if (!y->xtoy) { /* get xtoy */ 1633 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1634 y->XtoY = xx->B; 1635 } 1636 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1637 } else { 1638 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1639 } 1640 PetscFunctionReturn(0); 1641 } 1642 1643 /* -------------------------------------------------------------------*/ 1644 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1645 MatGetRow_MPIAIJ, 1646 MatRestoreRow_MPIAIJ, 1647 MatMult_MPIAIJ, 1648 /* 4*/ MatMultAdd_MPIAIJ, 1649 MatMultTranspose_MPIAIJ, 1650 MatMultTransposeAdd_MPIAIJ, 1651 0, 1652 0, 1653 0, 1654 /*10*/ 0, 1655 0, 1656 0, 1657 MatRelax_MPIAIJ, 1658 MatTranspose_MPIAIJ, 1659 /*15*/ MatGetInfo_MPIAIJ, 1660 MatEqual_MPIAIJ, 1661 MatGetDiagonal_MPIAIJ, 1662 MatDiagonalScale_MPIAIJ, 1663 MatNorm_MPIAIJ, 1664 /*20*/ MatAssemblyBegin_MPIAIJ, 1665 MatAssemblyEnd_MPIAIJ, 1666 0, 1667 MatSetOption_MPIAIJ, 1668 MatZeroEntries_MPIAIJ, 1669 /*25*/ MatZeroRows_MPIAIJ, 1670 0, 1671 0, 1672 0, 1673 0, 1674 /*30*/ MatSetUpPreallocation_MPIAIJ, 1675 0, 1676 0, 1677 0, 1678 0, 1679 /*35*/ MatDuplicate_MPIAIJ, 1680 0, 1681 0, 1682 0, 1683 0, 1684 /*40*/ MatAXPY_MPIAIJ, 1685 MatGetSubMatrices_MPIAIJ, 1686 MatIncreaseOverlap_MPIAIJ, 1687 MatGetValues_MPIAIJ, 1688 MatCopy_MPIAIJ, 1689 /*45*/ MatPrintHelp_MPIAIJ, 1690 MatScale_MPIAIJ, 1691 0, 1692 0, 1693 0, 1694 /*50*/ MatSetBlockSize_MPIAIJ, 1695 0, 1696 0, 1697 0, 1698 0, 1699 /*55*/ MatFDColoringCreate_MPIAIJ, 1700 0, 1701 MatSetUnfactored_MPIAIJ, 1702 0, 1703 0, 1704 /*60*/ MatGetSubMatrix_MPIAIJ, 1705 MatDestroy_MPIAIJ, 1706 MatView_MPIAIJ, 1707 MatGetPetscMaps_Petsc, 1708 0, 1709 /*65*/ 0, 1710 0, 1711 0, 1712 0, 1713 0, 1714 /*70*/ 0, 1715 0, 1716 MatSetColoring_MPIAIJ, 1717 #if defined(PETSC_HAVE_ADIC) 1718 MatSetValuesAdic_MPIAIJ, 1719 #else 1720 0, 1721 #endif 1722 MatSetValuesAdifor_MPIAIJ, 1723 /*75*/ 0, 1724 0, 1725 0, 1726 0, 1727 0, 1728 /*80*/ 0, 1729 0, 1730 0, 1731 0, 1732 /*84*/ MatLoad_MPIAIJ, 1733 0, 1734 0, 1735 0, 1736 0, 1737 0, 1738 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1739 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1740 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1741 MatPtAP_Basic, 1742 MatPtAPSymbolic_MPIAIJ, 1743 /*95*/ MatPtAPNumeric_MPIAIJ, 1744 0, 1745 0, 1746 0, 1747 0, 1748 /*100*/0, 1749 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1750 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1751 }; 1752 1753 /* ----------------------------------------------------------------------------------------*/ 1754 1755 EXTERN_C_BEGIN 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1758 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1759 { 1760 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1761 PetscErrorCode ierr; 1762 1763 PetscFunctionBegin; 1764 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1765 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1766 PetscFunctionReturn(0); 1767 } 1768 EXTERN_C_END 1769 1770 EXTERN_C_BEGIN 1771 #undef __FUNCT__ 1772 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1773 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1774 { 1775 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1776 PetscErrorCode ierr; 1777 1778 PetscFunctionBegin; 1779 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1780 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1781 PetscFunctionReturn(0); 1782 } 1783 EXTERN_C_END 1784 1785 #include "petscpc.h" 1786 EXTERN_C_BEGIN 1787 #undef __FUNCT__ 1788 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1789 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1790 { 1791 Mat_MPIAIJ *b; 1792 PetscErrorCode ierr; 1793 PetscInt i; 1794 1795 PetscFunctionBegin; 1796 B->preallocated = PETSC_TRUE; 1797 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1798 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1799 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1800 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1801 if (d_nnz) { 1802 for (i=0; i<B->m; i++) { 1803 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1804 } 1805 } 1806 if (o_nnz) { 1807 for (i=0; i<B->m; i++) { 1808 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1809 } 1810 } 1811 b = (Mat_MPIAIJ*)B->data; 1812 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1813 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1814 1815 PetscFunctionReturn(0); 1816 } 1817 EXTERN_C_END 1818 1819 #undef __FUNCT__ 1820 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1821 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1822 { 1823 Mat mat; 1824 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1825 PetscErrorCode ierr; 1826 1827 PetscFunctionBegin; 1828 *newmat = 0; 1829 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1830 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1831 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1832 a = (Mat_MPIAIJ*)mat->data; 1833 1834 mat->factor = matin->factor; 1835 mat->bs = matin->bs; 1836 mat->assembled = PETSC_TRUE; 1837 mat->insertmode = NOT_SET_VALUES; 1838 mat->preallocated = PETSC_TRUE; 1839 1840 a->rstart = oldmat->rstart; 1841 a->rend = oldmat->rend; 1842 a->cstart = oldmat->cstart; 1843 a->cend = oldmat->cend; 1844 a->size = oldmat->size; 1845 a->rank = oldmat->rank; 1846 a->donotstash = oldmat->donotstash; 1847 a->roworiented = oldmat->roworiented; 1848 a->rowindices = 0; 1849 a->rowvalues = 0; 1850 a->getrowactive = PETSC_FALSE; 1851 1852 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1853 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1854 if (oldmat->colmap) { 1855 #if defined (PETSC_USE_CTABLE) 1856 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1857 #else 1858 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1859 ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1860 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1861 #endif 1862 } else a->colmap = 0; 1863 if (oldmat->garray) { 1864 PetscInt len; 1865 len = oldmat->B->n; 1866 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1867 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1868 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1869 } else a->garray = 0; 1870 1871 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1872 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1873 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1874 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1875 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1876 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1877 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1878 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1879 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1880 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1881 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1882 *newmat = mat; 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #include "petscsys.h" 1887 1888 #undef __FUNCT__ 1889 #define __FUNCT__ "MatLoad_MPIAIJ" 1890 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1891 { 1892 Mat A; 1893 PetscScalar *vals,*svals; 1894 MPI_Comm comm = ((PetscObject)viewer)->comm; 1895 MPI_Status status; 1896 PetscErrorCode ierr; 1897 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1898 PetscInt i,nz,j,rstart,rend,mmax; 1899 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1900 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1901 PetscInt cend,cstart,n,*rowners; 1902 int fd; 1903 1904 PetscFunctionBegin; 1905 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1906 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1907 if (!rank) { 1908 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1909 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1910 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1911 } 1912 1913 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1914 M = header[1]; N = header[2]; 1915 /* determine ownership of all rows */ 1916 m = M/size + ((M % size) > rank); 1917 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1918 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1919 1920 /* First process needs enough room for process with most rows */ 1921 if (!rank) { 1922 mmax = rowners[1]; 1923 for (i=2; i<size; i++) { 1924 mmax = PetscMax(mmax,rowners[i]); 1925 } 1926 } else mmax = m; 1927 1928 rowners[0] = 0; 1929 for (i=2; i<=size; i++) { 1930 mmax = PetscMax(mmax,rowners[i]); 1931 rowners[i] += rowners[i-1]; 1932 } 1933 rstart = rowners[rank]; 1934 rend = rowners[rank+1]; 1935 1936 /* distribute row lengths to all processors */ 1937 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 1938 if (!rank) { 1939 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1940 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1941 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1942 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1943 for (j=0; j<m; j++) { 1944 procsnz[0] += ourlens[j]; 1945 } 1946 for (i=1; i<size; i++) { 1947 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1948 /* calculate the number of nonzeros on each processor */ 1949 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1950 procsnz[i] += rowlengths[j]; 1951 } 1952 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1953 } 1954 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1955 } else { 1956 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1957 } 1958 1959 if (!rank) { 1960 /* determine max buffer needed and allocate it */ 1961 maxnz = 0; 1962 for (i=0; i<size; i++) { 1963 maxnz = PetscMax(maxnz,procsnz[i]); 1964 } 1965 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1966 1967 /* read in my part of the matrix column indices */ 1968 nz = procsnz[0]; 1969 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1970 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1971 1972 /* read in every one elses and ship off */ 1973 for (i=1; i<size; i++) { 1974 nz = procsnz[i]; 1975 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1976 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1977 } 1978 ierr = PetscFree(cols);CHKERRQ(ierr); 1979 } else { 1980 /* determine buffer space needed for message */ 1981 nz = 0; 1982 for (i=0; i<m; i++) { 1983 nz += ourlens[i]; 1984 } 1985 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1986 1987 /* receive message of column indices*/ 1988 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1989 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1990 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1991 } 1992 1993 /* determine column ownership if matrix is not square */ 1994 if (N != M) { 1995 n = N/size + ((N % size) > rank); 1996 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1997 cstart = cend - n; 1998 } else { 1999 cstart = rstart; 2000 cend = rend; 2001 n = cend - cstart; 2002 } 2003 2004 /* loop over local rows, determining number of off diagonal entries */ 2005 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2006 jj = 0; 2007 for (i=0; i<m; i++) { 2008 for (j=0; j<ourlens[i]; j++) { 2009 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2010 jj++; 2011 } 2012 } 2013 2014 /* create our matrix */ 2015 for (i=0; i<m; i++) { 2016 ourlens[i] -= offlens[i]; 2017 } 2018 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2019 ierr = MatSetType(A,type);CHKERRQ(ierr); 2020 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2021 2022 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2023 for (i=0; i<m; i++) { 2024 ourlens[i] += offlens[i]; 2025 } 2026 2027 if (!rank) { 2028 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2029 2030 /* read in my part of the matrix numerical values */ 2031 nz = procsnz[0]; 2032 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2033 2034 /* insert into matrix */ 2035 jj = rstart; 2036 smycols = mycols; 2037 svals = vals; 2038 for (i=0; i<m; i++) { 2039 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2040 smycols += ourlens[i]; 2041 svals += ourlens[i]; 2042 jj++; 2043 } 2044 2045 /* read in other processors and ship out */ 2046 for (i=1; i<size; i++) { 2047 nz = procsnz[i]; 2048 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2049 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2050 } 2051 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2052 } else { 2053 /* receive numeric values */ 2054 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2055 2056 /* receive message of values*/ 2057 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2058 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2059 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2060 2061 /* insert into matrix */ 2062 jj = rstart; 2063 smycols = mycols; 2064 svals = vals; 2065 for (i=0; i<m; i++) { 2066 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2067 smycols += ourlens[i]; 2068 svals += ourlens[i]; 2069 jj++; 2070 } 2071 } 2072 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2073 ierr = PetscFree(vals);CHKERRQ(ierr); 2074 ierr = PetscFree(mycols);CHKERRQ(ierr); 2075 ierr = PetscFree(rowners);CHKERRQ(ierr); 2076 2077 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 *newmat = A; 2080 PetscFunctionReturn(0); 2081 } 2082 2083 #undef __FUNCT__ 2084 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2085 /* 2086 Not great since it makes two copies of the submatrix, first an SeqAIJ 2087 in local and then by concatenating the local matrices the end result. 2088 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2089 */ 2090 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2091 { 2092 PetscErrorCode ierr; 2093 PetscMPIInt rank,size; 2094 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2095 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2096 Mat *local,M,Mreuse; 2097 PetscScalar *vwork,*aa; 2098 MPI_Comm comm = mat->comm; 2099 Mat_SeqAIJ *aij; 2100 2101 2102 PetscFunctionBegin; 2103 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2104 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2105 2106 if (call == MAT_REUSE_MATRIX) { 2107 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2108 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2109 local = &Mreuse; 2110 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2111 } else { 2112 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2113 Mreuse = *local; 2114 ierr = PetscFree(local);CHKERRQ(ierr); 2115 } 2116 2117 /* 2118 m - number of local rows 2119 n - number of columns (same on all processors) 2120 rstart - first row in new global matrix generated 2121 */ 2122 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2123 if (call == MAT_INITIAL_MATRIX) { 2124 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2125 ii = aij->i; 2126 jj = aij->j; 2127 2128 /* 2129 Determine the number of non-zeros in the diagonal and off-diagonal 2130 portions of the matrix in order to do correct preallocation 2131 */ 2132 2133 /* first get start and end of "diagonal" columns */ 2134 if (csize == PETSC_DECIDE) { 2135 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2136 if (mglobal == n) { /* square matrix */ 2137 nlocal = m; 2138 } else { 2139 nlocal = n/size + ((n % size) > rank); 2140 } 2141 } else { 2142 nlocal = csize; 2143 } 2144 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2145 rstart = rend - nlocal; 2146 if (rank == size - 1 && rend != n) { 2147 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2148 } 2149 2150 /* next, compute all the lengths */ 2151 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2152 olens = dlens + m; 2153 for (i=0; i<m; i++) { 2154 jend = ii[i+1] - ii[i]; 2155 olen = 0; 2156 dlen = 0; 2157 for (j=0; j<jend; j++) { 2158 if (*jj < rstart || *jj >= rend) olen++; 2159 else dlen++; 2160 jj++; 2161 } 2162 olens[i] = olen; 2163 dlens[i] = dlen; 2164 } 2165 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2166 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2167 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2168 ierr = PetscFree(dlens);CHKERRQ(ierr); 2169 } else { 2170 PetscInt ml,nl; 2171 2172 M = *newmat; 2173 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2174 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2175 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2176 /* 2177 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2178 rather than the slower MatSetValues(). 2179 */ 2180 M->was_assembled = PETSC_TRUE; 2181 M->assembled = PETSC_FALSE; 2182 } 2183 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2184 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2185 ii = aij->i; 2186 jj = aij->j; 2187 aa = aij->a; 2188 for (i=0; i<m; i++) { 2189 row = rstart + i; 2190 nz = ii[i+1] - ii[i]; 2191 cwork = jj; jj += nz; 2192 vwork = aa; aa += nz; 2193 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2194 } 2195 2196 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2197 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2198 *newmat = M; 2199 2200 /* save submatrix used in processor for next request */ 2201 if (call == MAT_INITIAL_MATRIX) { 2202 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2203 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2204 } 2205 2206 PetscFunctionReturn(0); 2207 } 2208 2209 EXTERN_C_BEGIN 2210 #undef __FUNCT__ 2211 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2212 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2213 { 2214 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2215 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2216 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2217 const PetscInt *JJ; 2218 PetscScalar *values; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 #if defined(PETSC_OPT_g) 2223 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2224 #endif 2225 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2226 o_nnz = d_nnz + m; 2227 2228 for (i=0; i<m; i++) { 2229 nnz = I[i+1]- I[i]; 2230 JJ = J + I[i]; 2231 nnz_max = PetscMax(nnz_max,nnz); 2232 #if defined(PETSC_OPT_g) 2233 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2234 #endif 2235 for (j=0; j<nnz; j++) { 2236 if (*JJ >= cstart) break; 2237 JJ++; 2238 } 2239 d = 0; 2240 for (; j<nnz; j++) { 2241 if (*JJ++ >= cend) break; 2242 d++; 2243 } 2244 d_nnz[i] = d; 2245 o_nnz[i] = nnz - d; 2246 } 2247 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2248 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2249 2250 if (v) values = (PetscScalar*)v; 2251 else { 2252 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2253 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2254 } 2255 2256 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2257 for (i=0; i<m; i++) { 2258 ii = i + rstart; 2259 nnz = I[i+1]- I[i]; 2260 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2261 } 2262 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2263 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2264 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2265 2266 if (!v) { 2267 ierr = PetscFree(values);CHKERRQ(ierr); 2268 } 2269 PetscFunctionReturn(0); 2270 } 2271 EXTERN_C_END 2272 2273 #undef __FUNCT__ 2274 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2275 /*@C 2276 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2277 (the default parallel PETSc format). 2278 2279 Collective on MPI_Comm 2280 2281 Input Parameters: 2282 + A - the matrix 2283 . i - the indices into j for the start of each local row (starts with zero) 2284 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2285 - v - optional values in the matrix 2286 2287 Level: developer 2288 2289 .keywords: matrix, aij, compressed row, sparse, parallel 2290 2291 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2292 @*/ 2293 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2294 { 2295 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2296 2297 PetscFunctionBegin; 2298 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2299 if (f) { 2300 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2301 } 2302 PetscFunctionReturn(0); 2303 } 2304 2305 #undef __FUNCT__ 2306 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2307 /*@C 2308 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2309 (the default parallel PETSc format). For good matrix assembly performance 2310 the user should preallocate the matrix storage by setting the parameters 2311 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2312 performance can be increased by more than a factor of 50. 2313 2314 Collective on MPI_Comm 2315 2316 Input Parameters: 2317 + A - the matrix 2318 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2319 (same value is used for all local rows) 2320 . d_nnz - array containing the number of nonzeros in the various rows of the 2321 DIAGONAL portion of the local submatrix (possibly different for each row) 2322 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2323 The size of this array is equal to the number of local rows, i.e 'm'. 2324 You must leave room for the diagonal entry even if it is zero. 2325 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2326 submatrix (same value is used for all local rows). 2327 - o_nnz - array containing the number of nonzeros in the various rows of the 2328 OFF-DIAGONAL portion of the local submatrix (possibly different for 2329 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2330 structure. The size of this array is equal to the number 2331 of local rows, i.e 'm'. 2332 2333 If the *_nnz parameter is given then the *_nz parameter is ignored 2334 2335 The AIJ format (also called the Yale sparse matrix format or 2336 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2337 storage. The stored row and column indices begin with zero. See the users manual for details. 2338 2339 The parallel matrix is partitioned such that the first m0 rows belong to 2340 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2341 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2342 2343 The DIAGONAL portion of the local submatrix of a processor can be defined 2344 as the submatrix which is obtained by extraction the part corresponding 2345 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2346 first row that belongs to the processor, and r2 is the last row belonging 2347 to the this processor. This is a square mxm matrix. The remaining portion 2348 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2349 2350 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2351 2352 Example usage: 2353 2354 Consider the following 8x8 matrix with 34 non-zero values, that is 2355 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2356 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2357 as follows: 2358 2359 .vb 2360 1 2 0 | 0 3 0 | 0 4 2361 Proc0 0 5 6 | 7 0 0 | 8 0 2362 9 0 10 | 11 0 0 | 12 0 2363 ------------------------------------- 2364 13 0 14 | 15 16 17 | 0 0 2365 Proc1 0 18 0 | 19 20 21 | 0 0 2366 0 0 0 | 22 23 0 | 24 0 2367 ------------------------------------- 2368 Proc2 25 26 27 | 0 0 28 | 29 0 2369 30 0 0 | 31 32 33 | 0 34 2370 .ve 2371 2372 This can be represented as a collection of submatrices as: 2373 2374 .vb 2375 A B C 2376 D E F 2377 G H I 2378 .ve 2379 2380 Where the submatrices A,B,C are owned by proc0, D,E,F are 2381 owned by proc1, G,H,I are owned by proc2. 2382 2383 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2384 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2385 The 'M','N' parameters are 8,8, and have the same values on all procs. 2386 2387 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2388 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2389 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2390 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2391 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2392 matrix, ans [DF] as another SeqAIJ matrix. 2393 2394 When d_nz, o_nz parameters are specified, d_nz storage elements are 2395 allocated for every row of the local diagonal submatrix, and o_nz 2396 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2397 One way to choose d_nz and o_nz is to use the max nonzerors per local 2398 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2399 In this case, the values of d_nz,o_nz are: 2400 .vb 2401 proc0 : dnz = 2, o_nz = 2 2402 proc1 : dnz = 3, o_nz = 2 2403 proc2 : dnz = 1, o_nz = 4 2404 .ve 2405 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2406 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2407 for proc3. i.e we are using 12+15+10=37 storage locations to store 2408 34 values. 2409 2410 When d_nnz, o_nnz parameters are specified, the storage is specified 2411 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2412 In the above case the values for d_nnz,o_nnz are: 2413 .vb 2414 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2415 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2416 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2417 .ve 2418 Here the space allocated is sum of all the above values i.e 34, and 2419 hence pre-allocation is perfect. 2420 2421 Level: intermediate 2422 2423 .keywords: matrix, aij, compressed row, sparse, parallel 2424 2425 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2426 MPIAIJ 2427 @*/ 2428 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2429 { 2430 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2431 2432 PetscFunctionBegin; 2433 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2434 if (f) { 2435 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2436 } 2437 PetscFunctionReturn(0); 2438 } 2439 2440 #undef __FUNCT__ 2441 #define __FUNCT__ "MatCreateMPIAIJ" 2442 /*@C 2443 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2444 (the default parallel PETSc format). For good matrix assembly performance 2445 the user should preallocate the matrix storage by setting the parameters 2446 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2447 performance can be increased by more than a factor of 50. 2448 2449 Collective on MPI_Comm 2450 2451 Input Parameters: 2452 + comm - MPI communicator 2453 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2454 This value should be the same as the local size used in creating the 2455 y vector for the matrix-vector product y = Ax. 2456 . n - This value should be the same as the local size used in creating the 2457 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2458 calculated if N is given) For square matrices n is almost always m. 2459 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2460 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2461 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2462 (same value is used for all local rows) 2463 . d_nnz - array containing the number of nonzeros in the various rows of the 2464 DIAGONAL portion of the local submatrix (possibly different for each row) 2465 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2466 The size of this array is equal to the number of local rows, i.e 'm'. 2467 You must leave room for the diagonal entry even if it is zero. 2468 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2469 submatrix (same value is used for all local rows). 2470 - o_nnz - array containing the number of nonzeros in the various rows of the 2471 OFF-DIAGONAL portion of the local submatrix (possibly different for 2472 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2473 structure. The size of this array is equal to the number 2474 of local rows, i.e 'm'. 2475 2476 Output Parameter: 2477 . A - the matrix 2478 2479 Notes: 2480 If the *_nnz parameter is given then the *_nz parameter is ignored 2481 2482 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2483 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2484 storage requirements for this matrix. 2485 2486 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2487 processor than it must be used on all processors that share the object for 2488 that argument. 2489 2490 The user MUST specify either the local or global matrix dimensions 2491 (possibly both). 2492 2493 The parallel matrix is partitioned such that the first m0 rows belong to 2494 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2495 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2496 2497 The DIAGONAL portion of the local submatrix of a processor can be defined 2498 as the submatrix which is obtained by extraction the part corresponding 2499 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2500 first row that belongs to the processor, and r2 is the last row belonging 2501 to the this processor. This is a square mxm matrix. The remaining portion 2502 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2503 2504 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2505 2506 When calling this routine with a single process communicator, a matrix of 2507 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2508 type of communicator, use the construction mechanism: 2509 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2510 2511 By default, this format uses inodes (identical nodes) when possible. 2512 We search for consecutive rows with the same nonzero structure, thereby 2513 reusing matrix information to achieve increased efficiency. 2514 2515 Options Database Keys: 2516 + -mat_no_inode - Do not use inodes 2517 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2518 - -mat_aij_oneindex - Internally use indexing starting at 1 2519 rather than 0. Note that when calling MatSetValues(), 2520 the user still MUST index entries starting at 0! 2521 2522 2523 Example usage: 2524 2525 Consider the following 8x8 matrix with 34 non-zero values, that is 2526 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2527 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2528 as follows: 2529 2530 .vb 2531 1 2 0 | 0 3 0 | 0 4 2532 Proc0 0 5 6 | 7 0 0 | 8 0 2533 9 0 10 | 11 0 0 | 12 0 2534 ------------------------------------- 2535 13 0 14 | 15 16 17 | 0 0 2536 Proc1 0 18 0 | 19 20 21 | 0 0 2537 0 0 0 | 22 23 0 | 24 0 2538 ------------------------------------- 2539 Proc2 25 26 27 | 0 0 28 | 29 0 2540 30 0 0 | 31 32 33 | 0 34 2541 .ve 2542 2543 This can be represented as a collection of submatrices as: 2544 2545 .vb 2546 A B C 2547 D E F 2548 G H I 2549 .ve 2550 2551 Where the submatrices A,B,C are owned by proc0, D,E,F are 2552 owned by proc1, G,H,I are owned by proc2. 2553 2554 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2555 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2556 The 'M','N' parameters are 8,8, and have the same values on all procs. 2557 2558 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2559 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2560 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2561 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2562 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2563 matrix, ans [DF] as another SeqAIJ matrix. 2564 2565 When d_nz, o_nz parameters are specified, d_nz storage elements are 2566 allocated for every row of the local diagonal submatrix, and o_nz 2567 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2568 One way to choose d_nz and o_nz is to use the max nonzerors per local 2569 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2570 In this case, the values of d_nz,o_nz are: 2571 .vb 2572 proc0 : dnz = 2, o_nz = 2 2573 proc1 : dnz = 3, o_nz = 2 2574 proc2 : dnz = 1, o_nz = 4 2575 .ve 2576 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2577 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2578 for proc3. i.e we are using 12+15+10=37 storage locations to store 2579 34 values. 2580 2581 When d_nnz, o_nnz parameters are specified, the storage is specified 2582 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2583 In the above case the values for d_nnz,o_nnz are: 2584 .vb 2585 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2586 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2587 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2588 .ve 2589 Here the space allocated is sum of all the above values i.e 34, and 2590 hence pre-allocation is perfect. 2591 2592 Level: intermediate 2593 2594 .keywords: matrix, aij, compressed row, sparse, parallel 2595 2596 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2597 MPIAIJ 2598 @*/ 2599 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2600 { 2601 PetscErrorCode ierr; 2602 PetscMPIInt size; 2603 2604 PetscFunctionBegin; 2605 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2606 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2607 if (size > 1) { 2608 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2609 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2610 } else { 2611 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2612 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2613 } 2614 PetscFunctionReturn(0); 2615 } 2616 2617 #undef __FUNCT__ 2618 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2619 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2620 { 2621 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2622 2623 PetscFunctionBegin; 2624 *Ad = a->A; 2625 *Ao = a->B; 2626 *colmap = a->garray; 2627 PetscFunctionReturn(0); 2628 } 2629 2630 #undef __FUNCT__ 2631 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2632 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2633 { 2634 PetscErrorCode ierr; 2635 PetscInt i; 2636 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2637 2638 PetscFunctionBegin; 2639 if (coloring->ctype == IS_COLORING_LOCAL) { 2640 ISColoringValue *allcolors,*colors; 2641 ISColoring ocoloring; 2642 2643 /* set coloring for diagonal portion */ 2644 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2645 2646 /* set coloring for off-diagonal portion */ 2647 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2648 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2649 for (i=0; i<a->B->n; i++) { 2650 colors[i] = allcolors[a->garray[i]]; 2651 } 2652 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2653 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2654 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2655 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2656 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2657 ISColoringValue *colors; 2658 PetscInt *larray; 2659 ISColoring ocoloring; 2660 2661 /* set coloring for diagonal portion */ 2662 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2663 for (i=0; i<a->A->n; i++) { 2664 larray[i] = i + a->cstart; 2665 } 2666 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2667 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2668 for (i=0; i<a->A->n; i++) { 2669 colors[i] = coloring->colors[larray[i]]; 2670 } 2671 ierr = PetscFree(larray);CHKERRQ(ierr); 2672 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2673 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2674 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2675 2676 /* set coloring for off-diagonal portion */ 2677 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2678 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2679 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2680 for (i=0; i<a->B->n; i++) { 2681 colors[i] = coloring->colors[larray[i]]; 2682 } 2683 ierr = PetscFree(larray);CHKERRQ(ierr); 2684 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2685 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2686 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2687 } else { 2688 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2689 } 2690 2691 PetscFunctionReturn(0); 2692 } 2693 2694 #if defined(PETSC_HAVE_ADIC) 2695 #undef __FUNCT__ 2696 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2697 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2698 { 2699 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2700 PetscErrorCode ierr; 2701 2702 PetscFunctionBegin; 2703 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2704 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2705 PetscFunctionReturn(0); 2706 } 2707 #endif 2708 2709 #undef __FUNCT__ 2710 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2711 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2712 { 2713 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2714 PetscErrorCode ierr; 2715 2716 PetscFunctionBegin; 2717 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2718 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2719 PetscFunctionReturn(0); 2720 } 2721 2722 #undef __FUNCT__ 2723 #define __FUNCT__ "MatMerge" 2724 /*@C 2725 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2726 matrices from each processor 2727 2728 Collective on MPI_Comm 2729 2730 Input Parameters: 2731 + comm - the communicators the parallel matrix will live on 2732 . inmat - the input sequential matrices 2733 . n - number of local columns (or PETSC_DECIDE) 2734 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2735 2736 Output Parameter: 2737 . outmat - the parallel matrix generated 2738 2739 Level: advanced 2740 2741 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2742 2743 @*/ 2744 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2745 { 2746 PetscErrorCode ierr; 2747 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2748 PetscInt *indx; 2749 PetscScalar *values; 2750 PetscMap columnmap,rowmap; 2751 2752 PetscFunctionBegin; 2753 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2754 /* 2755 PetscMPIInt rank; 2756 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2757 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2758 */ 2759 if (scall == MAT_INITIAL_MATRIX){ 2760 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2761 if (n == PETSC_DECIDE){ 2762 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2763 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2764 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2765 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2766 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2767 } 2768 2769 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2770 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2771 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2772 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2773 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2774 2775 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2776 for (i=0;i<m;i++) { 2777 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2778 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2779 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2780 } 2781 /* This routine will ONLY return MPIAIJ type matrix */ 2782 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2783 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2784 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2785 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2786 2787 } else if (scall == MAT_REUSE_MATRIX){ 2788 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2789 } else { 2790 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2791 } 2792 2793 for (i=0;i<m;i++) { 2794 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2795 I = i + rstart; 2796 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2797 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2798 } 2799 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2800 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2801 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2802 2803 PetscFunctionReturn(0); 2804 } 2805 2806 #undef __FUNCT__ 2807 #define __FUNCT__ "MatFileSplit" 2808 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2809 { 2810 PetscErrorCode ierr; 2811 PetscMPIInt rank; 2812 PetscInt m,N,i,rstart,nnz; 2813 size_t len; 2814 const PetscInt *indx; 2815 PetscViewer out; 2816 char *name; 2817 Mat B; 2818 const PetscScalar *values; 2819 2820 PetscFunctionBegin; 2821 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2822 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2823 /* Should this be the type of the diagonal block of A? */ 2824 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2825 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2826 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2827 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2828 for (i=0;i<m;i++) { 2829 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2830 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2831 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2832 } 2833 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2834 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2835 2836 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2837 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2838 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2839 sprintf(name,"%s.%d",outfile,rank); 2840 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2841 ierr = PetscFree(name); 2842 ierr = MatView(B,out);CHKERRQ(ierr); 2843 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2844 ierr = MatDestroy(B);CHKERRQ(ierr); 2845 PetscFunctionReturn(0); 2846 } 2847 2848 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2849 #undef __FUNCT__ 2850 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2851 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2852 { 2853 PetscErrorCode ierr; 2854 Mat_Merge_SeqsToMPI *merge; 2855 PetscObjectContainer container; 2856 2857 PetscFunctionBegin; 2858 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2859 if (container) { 2860 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2861 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2862 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2863 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2864 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2865 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2866 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2867 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2868 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2869 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2870 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2871 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2872 2873 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2874 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2875 } 2876 ierr = PetscFree(merge);CHKERRQ(ierr); 2877 2878 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2879 PetscFunctionReturn(0); 2880 } 2881 2882 #include "src/mat/utils/freespace.h" 2883 #include "petscbt.h" 2884 static PetscEvent logkey_seqstompinum = 0; 2885 #undef __FUNCT__ 2886 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 2887 /*@C 2888 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2889 matrices from each processor 2890 2891 Collective on MPI_Comm 2892 2893 Input Parameters: 2894 + comm - the communicators the parallel matrix will live on 2895 . seqmat - the input sequential matrices 2896 . m - number of local rows (or PETSC_DECIDE) 2897 . n - number of local columns (or PETSC_DECIDE) 2898 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2899 2900 Output Parameter: 2901 . mpimat - the parallel matrix generated 2902 2903 Level: advanced 2904 2905 Notes: 2906 The dimensions of the sequential matrix in each processor MUST be the same. 2907 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2908 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2909 @*/ 2910 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2911 { 2912 PetscErrorCode ierr; 2913 MPI_Comm comm=mpimat->comm; 2914 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2915 PetscMPIInt size,rank,taga,*len_s; 2916 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2917 PetscInt proc,m; 2918 PetscInt **buf_ri,**buf_rj; 2919 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2920 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2921 MPI_Request *s_waits,*r_waits; 2922 MPI_Status *status; 2923 MatScalar *aa=a->a,**abuf_r,*ba_i; 2924 Mat_Merge_SeqsToMPI *merge; 2925 PetscObjectContainer container; 2926 2927 PetscFunctionBegin; 2928 if (!logkey_seqstompinum) { 2929 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2930 } 2931 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2932 2933 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2934 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2935 2936 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2937 if (container) { 2938 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2939 } 2940 bi = merge->bi; 2941 bj = merge->bj; 2942 buf_ri = merge->buf_ri; 2943 buf_rj = merge->buf_rj; 2944 2945 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2946 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2947 len_s = merge->len_s; 2948 2949 /* send and recv matrix values */ 2950 /*-----------------------------*/ 2951 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2952 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2953 2954 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2955 for (proc=0,k=0; proc<size; proc++){ 2956 if (!len_s[proc]) continue; 2957 i = owners[proc]; 2958 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2959 k++; 2960 } 2961 2962 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2963 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2964 ierr = PetscFree(status);CHKERRQ(ierr); 2965 2966 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2967 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2968 2969 /* insert mat values of mpimat */ 2970 /*----------------------------*/ 2971 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2972 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2973 nextrow = buf_ri_k + merge->nrecv; 2974 nextai = nextrow + merge->nrecv; 2975 2976 for (k=0; k<merge->nrecv; k++){ 2977 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2978 nrows = *(buf_ri_k[k]); 2979 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2980 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2981 } 2982 2983 /* set values of ba */ 2984 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2985 for (i=0; i<m; i++) { 2986 arow = owners[rank] + i; 2987 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2988 bnzi = bi[i+1] - bi[i]; 2989 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2990 2991 /* add local non-zero vals of this proc's seqmat into ba */ 2992 anzi = ai[arow+1] - ai[arow]; 2993 aj = a->j + ai[arow]; 2994 aa = a->a + ai[arow]; 2995 nextaj = 0; 2996 for (j=0; nextaj<anzi; j++){ 2997 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2998 ba_i[j] += aa[nextaj++]; 2999 } 3000 } 3001 3002 /* add received vals into ba */ 3003 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3004 /* i-th row */ 3005 if (i == *nextrow[k]) { 3006 anzi = *(nextai[k]+1) - *nextai[k]; 3007 aj = buf_rj[k] + *(nextai[k]); 3008 aa = abuf_r[k] + *(nextai[k]); 3009 nextaj = 0; 3010 for (j=0; nextaj<anzi; j++){ 3011 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3012 ba_i[j] += aa[nextaj++]; 3013 } 3014 } 3015 nextrow[k]++; nextai[k]++; 3016 } 3017 } 3018 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3019 } 3020 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3021 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3022 3023 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3024 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3025 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3026 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3027 PetscFunctionReturn(0); 3028 } 3029 3030 static PetscEvent logkey_seqstompisym = 0; 3031 #undef __FUNCT__ 3032 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3033 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3034 { 3035 PetscErrorCode ierr; 3036 Mat B_mpi; 3037 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3038 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3039 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3040 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3041 PetscInt len,proc,*dnz,*onz; 3042 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3043 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3044 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3045 MPI_Status *status; 3046 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3047 PetscBT lnkbt; 3048 Mat_Merge_SeqsToMPI *merge; 3049 PetscObjectContainer container; 3050 3051 PetscFunctionBegin; 3052 if (!logkey_seqstompisym) { 3053 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3054 } 3055 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3056 3057 /* make sure it is a PETSc comm */ 3058 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3059 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3060 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3061 3062 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3063 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3064 3065 /* determine row ownership */ 3066 /*---------------------------------------------------------*/ 3067 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3068 if (m == PETSC_DECIDE) { 3069 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3070 } else { 3071 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3072 } 3073 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3074 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3075 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3076 3077 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3078 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3079 3080 /* determine the number of messages to send, their lengths */ 3081 /*---------------------------------------------------------*/ 3082 len_s = merge->len_s; 3083 3084 len = 0; /* length of buf_si[] */ 3085 merge->nsend = 0; 3086 for (proc=0; proc<size; proc++){ 3087 len_si[proc] = 0; 3088 if (proc == rank){ 3089 len_s[proc] = 0; 3090 } else { 3091 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3092 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3093 } 3094 if (len_s[proc]) { 3095 merge->nsend++; 3096 nrows = 0; 3097 for (i=owners[proc]; i<owners[proc+1]; i++){ 3098 if (ai[i+1] > ai[i]) nrows++; 3099 } 3100 len_si[proc] = 2*(nrows+1); 3101 len += len_si[proc]; 3102 } 3103 } 3104 3105 /* determine the number and length of messages to receive for ij-structure */ 3106 /*-------------------------------------------------------------------------*/ 3107 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3108 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3109 3110 /* post the Irecv of j-structure */ 3111 /*-------------------------------*/ 3112 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3113 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3114 3115 /* post the Isend of j-structure */ 3116 /*--------------------------------*/ 3117 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3118 sj_waits = si_waits + merge->nsend; 3119 3120 for (proc=0, k=0; proc<size; proc++){ 3121 if (!len_s[proc]) continue; 3122 i = owners[proc]; 3123 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3124 k++; 3125 } 3126 3127 /* receives and sends of j-structure are complete */ 3128 /*------------------------------------------------*/ 3129 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3130 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3131 3132 /* send and recv i-structure */ 3133 /*---------------------------*/ 3134 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3135 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3136 3137 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3138 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3139 for (proc=0,k=0; proc<size; proc++){ 3140 if (!len_s[proc]) continue; 3141 /* form outgoing message for i-structure: 3142 buf_si[0]: nrows to be sent 3143 [1:nrows]: row index (global) 3144 [nrows+1:2*nrows+1]: i-structure index 3145 */ 3146 /*-------------------------------------------*/ 3147 nrows = len_si[proc]/2 - 1; 3148 buf_si_i = buf_si + nrows+1; 3149 buf_si[0] = nrows; 3150 buf_si_i[0] = 0; 3151 nrows = 0; 3152 for (i=owners[proc]; i<owners[proc+1]; i++){ 3153 anzi = ai[i+1] - ai[i]; 3154 if (anzi) { 3155 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3156 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3157 nrows++; 3158 } 3159 } 3160 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3161 k++; 3162 buf_si += len_si[proc]; 3163 } 3164 3165 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3166 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3167 3168 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr); 3169 for (i=0; i<merge->nrecv; i++){ 3170 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr); 3171 } 3172 3173 ierr = PetscFree(len_si);CHKERRQ(ierr); 3174 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3175 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3176 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3177 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3178 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3179 ierr = PetscFree(status);CHKERRQ(ierr); 3180 3181 /* compute a local seq matrix in each processor */ 3182 /*----------------------------------------------*/ 3183 /* allocate bi array and free space for accumulating nonzero column info */ 3184 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3185 bi[0] = 0; 3186 3187 /* create and initialize a linked list */ 3188 nlnk = N+1; 3189 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3190 3191 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3192 len = 0; 3193 len = ai[owners[rank+1]] - ai[owners[rank]]; 3194 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3195 current_space = free_space; 3196 3197 /* determine symbolic info for each local row */ 3198 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3199 nextrow = buf_ri_k + merge->nrecv; 3200 nextai = nextrow + merge->nrecv; 3201 for (k=0; k<merge->nrecv; k++){ 3202 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3203 nrows = *buf_ri_k[k]; 3204 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3205 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3206 } 3207 3208 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3209 len = 0; 3210 for (i=0;i<m;i++) { 3211 bnzi = 0; 3212 /* add local non-zero cols of this proc's seqmat into lnk */ 3213 arow = owners[rank] + i; 3214 anzi = ai[arow+1] - ai[arow]; 3215 aj = a->j + ai[arow]; 3216 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3217 bnzi += nlnk; 3218 /* add received col data into lnk */ 3219 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3220 if (i == *nextrow[k]) { /* i-th row */ 3221 anzi = *(nextai[k]+1) - *nextai[k]; 3222 aj = buf_rj[k] + *nextai[k]; 3223 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3224 bnzi += nlnk; 3225 nextrow[k]++; nextai[k]++; 3226 } 3227 } 3228 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3229 3230 /* if free space is not available, make more free space */ 3231 if (current_space->local_remaining<bnzi) { 3232 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3233 nspacedouble++; 3234 } 3235 /* copy data into free space, then initialize lnk */ 3236 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3237 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3238 3239 current_space->array += bnzi; 3240 current_space->local_used += bnzi; 3241 current_space->local_remaining -= bnzi; 3242 3243 bi[i+1] = bi[i] + bnzi; 3244 } 3245 3246 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3247 3248 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3249 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3250 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3251 3252 /* create symbolic parallel matrix B_mpi */ 3253 /*---------------------------------------*/ 3254 if (n==PETSC_DECIDE) { 3255 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3256 } else { 3257 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3258 } 3259 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3260 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3261 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3262 3263 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3264 B_mpi->assembled = PETSC_FALSE; 3265 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3266 merge->bi = bi; 3267 merge->bj = bj; 3268 merge->buf_ri = buf_ri; 3269 merge->buf_rj = buf_rj; 3270 merge->coi = PETSC_NULL; 3271 merge->coj = PETSC_NULL; 3272 merge->owners_co = PETSC_NULL; 3273 3274 /* attach the supporting struct to B_mpi for reuse */ 3275 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3276 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3277 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3278 *mpimat = B_mpi; 3279 3280 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3281 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3282 PetscFunctionReturn(0); 3283 } 3284 3285 static PetscEvent logkey_seqstompi = 0; 3286 #undef __FUNCT__ 3287 #define __FUNCT__ "MatMerge_SeqsToMPI" 3288 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3289 { 3290 PetscErrorCode ierr; 3291 3292 PetscFunctionBegin; 3293 if (!logkey_seqstompi) { 3294 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3295 } 3296 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3297 if (scall == MAT_INITIAL_MATRIX){ 3298 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3299 } 3300 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3301 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3302 PetscFunctionReturn(0); 3303 } 3304 static PetscEvent logkey_getlocalmat = 0; 3305 #undef __FUNCT__ 3306 #define __FUNCT__ "MatGetLocalMat" 3307 /*@C 3308 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3309 3310 Not Collective 3311 3312 Input Parameters: 3313 + A - the matrix 3314 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3315 3316 Output Parameter: 3317 . A_loc - the local sequential matrix generated 3318 3319 Level: developer 3320 3321 @*/ 3322 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3323 { 3324 PetscErrorCode ierr; 3325 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3326 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3327 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3328 PetscScalar *aa=a->a,*ba=b->a,*ca; 3329 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3330 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3331 3332 PetscFunctionBegin; 3333 if (!logkey_getlocalmat) { 3334 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3335 } 3336 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3337 if (scall == MAT_INITIAL_MATRIX){ 3338 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3339 ci[0] = 0; 3340 for (i=0; i<am; i++){ 3341 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3342 } 3343 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3344 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3345 k = 0; 3346 for (i=0; i<am; i++) { 3347 ncols_o = bi[i+1] - bi[i]; 3348 ncols_d = ai[i+1] - ai[i]; 3349 /* off-diagonal portion of A */ 3350 for (jo=0; jo<ncols_o; jo++) { 3351 col = cmap[*bj]; 3352 if (col >= cstart) break; 3353 cj[k] = col; bj++; 3354 ca[k++] = *ba++; 3355 } 3356 /* diagonal portion of A */ 3357 for (j=0; j<ncols_d; j++) { 3358 cj[k] = cstart + *aj++; 3359 ca[k++] = *aa++; 3360 } 3361 /* off-diagonal portion of A */ 3362 for (j=jo; j<ncols_o; j++) { 3363 cj[k] = cmap[*bj++]; 3364 ca[k++] = *ba++; 3365 } 3366 } 3367 /* put together the new matrix */ 3368 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3369 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3370 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3371 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3372 mat->freedata = PETSC_TRUE; 3373 mat->nonew = 0; 3374 } else if (scall == MAT_REUSE_MATRIX){ 3375 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3376 ci = mat->i; cj = mat->j; ca = mat->a; 3377 for (i=0; i<am; i++) { 3378 /* off-diagonal portion of A */ 3379 ncols_o = bi[i+1] - bi[i]; 3380 for (jo=0; jo<ncols_o; jo++) { 3381 col = cmap[*bj]; 3382 if (col >= cstart) break; 3383 *ca++ = *ba++; bj++; 3384 } 3385 /* diagonal portion of A */ 3386 ncols_d = ai[i+1] - ai[i]; 3387 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3388 /* off-diagonal portion of A */ 3389 for (j=jo; j<ncols_o; j++) { 3390 *ca++ = *ba++; bj++; 3391 } 3392 } 3393 } else { 3394 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3395 } 3396 3397 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3398 PetscFunctionReturn(0); 3399 } 3400 3401 static PetscEvent logkey_getlocalmatcondensed = 0; 3402 #undef __FUNCT__ 3403 #define __FUNCT__ "MatGetLocalMatCondensed" 3404 /*@C 3405 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3406 3407 Not Collective 3408 3409 Input Parameters: 3410 + A - the matrix 3411 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3412 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3413 3414 Output Parameter: 3415 . A_loc - the local sequential matrix generated 3416 3417 Level: developer 3418 3419 @*/ 3420 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3421 { 3422 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3423 PetscErrorCode ierr; 3424 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3425 IS isrowa,iscola; 3426 Mat *aloc; 3427 3428 PetscFunctionBegin; 3429 if (!logkey_getlocalmatcondensed) { 3430 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3431 } 3432 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3433 if (!row){ 3434 start = a->rstart; end = a->rend; 3435 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3436 } else { 3437 isrowa = *row; 3438 } 3439 if (!col){ 3440 start = a->cstart; 3441 cmap = a->garray; 3442 nzA = a->A->n; 3443 nzB = a->B->n; 3444 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3445 ncols = 0; 3446 for (i=0; i<nzB; i++) { 3447 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3448 else break; 3449 } 3450 imark = i; 3451 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3452 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3453 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3454 ierr = PetscFree(idx);CHKERRQ(ierr); 3455 } else { 3456 iscola = *col; 3457 } 3458 if (scall != MAT_INITIAL_MATRIX){ 3459 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3460 aloc[0] = *A_loc; 3461 } 3462 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3463 *A_loc = aloc[0]; 3464 ierr = PetscFree(aloc);CHKERRQ(ierr); 3465 if (!row){ 3466 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3467 } 3468 if (!col){ 3469 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3470 } 3471 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3472 PetscFunctionReturn(0); 3473 } 3474 3475 static PetscEvent logkey_GetBrowsOfAcols = 0; 3476 #undef __FUNCT__ 3477 #define __FUNCT__ "MatGetBrowsOfAcols" 3478 /*@C 3479 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3480 3481 Collective on Mat 3482 3483 Input Parameters: 3484 + A,B - the matrices in mpiaij format 3485 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3486 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3487 3488 Output Parameter: 3489 + rowb, colb - index sets of rows and columns of B to extract 3490 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3491 - B_seq - the sequential matrix generated 3492 3493 Level: developer 3494 3495 @*/ 3496 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3497 { 3498 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3499 PetscErrorCode ierr; 3500 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3501 IS isrowb,iscolb; 3502 Mat *bseq; 3503 3504 PetscFunctionBegin; 3505 if (a->cstart != b->rstart || a->cend != b->rend){ 3506 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3507 } 3508 if (!logkey_GetBrowsOfAcols) { 3509 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3510 } 3511 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3512 3513 if (scall == MAT_INITIAL_MATRIX){ 3514 start = a->cstart; 3515 cmap = a->garray; 3516 nzA = a->A->n; 3517 nzB = a->B->n; 3518 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3519 ncols = 0; 3520 for (i=0; i<nzB; i++) { /* row < local row index */ 3521 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3522 else break; 3523 } 3524 imark = i; 3525 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3526 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3527 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3528 ierr = PetscFree(idx);CHKERRQ(ierr); 3529 *brstart = imark; 3530 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3531 } else { 3532 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3533 isrowb = *rowb; iscolb = *colb; 3534 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3535 bseq[0] = *B_seq; 3536 } 3537 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3538 *B_seq = bseq[0]; 3539 ierr = PetscFree(bseq);CHKERRQ(ierr); 3540 if (!rowb){ 3541 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3542 } else { 3543 *rowb = isrowb; 3544 } 3545 if (!colb){ 3546 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3547 } else { 3548 *colb = iscolb; 3549 } 3550 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3551 PetscFunctionReturn(0); 3552 } 3553 3554 static PetscEvent logkey_GetBrowsOfAocols = 0; 3555 #undef __FUNCT__ 3556 #define __FUNCT__ "MatGetBrowsOfAoCols" 3557 /*@C 3558 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3559 of the OFF-DIAGONAL portion of local A 3560 3561 Collective on Mat 3562 3563 Input Parameters: 3564 + A,B - the matrices in mpiaij format 3565 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3566 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3567 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3568 3569 Output Parameter: 3570 + B_oth - the sequential matrix generated 3571 3572 Level: developer 3573 3574 @*/ 3575 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3576 { 3577 VecScatter_MPI_General *gen_to,*gen_from; 3578 PetscErrorCode ierr; 3579 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3580 Mat_SeqAIJ *b_oth; 3581 VecScatter ctx=a->Mvctx; 3582 MPI_Comm comm=ctx->comm; 3583 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3584 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3585 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3586 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3587 MPI_Request *rwaits,*swaits; 3588 MPI_Status *sstatus,rstatus; 3589 PetscInt *cols; 3590 PetscScalar *vals; 3591 PetscMPIInt j; 3592 3593 PetscFunctionBegin; 3594 if (a->cstart != b->rstart || a->cend != b->rend){ 3595 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3596 } 3597 if (!logkey_GetBrowsOfAocols) { 3598 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3599 } 3600 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3601 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3602 3603 gen_to = (VecScatter_MPI_General*)ctx->todata; 3604 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3605 rvalues = gen_from->values; /* holds the length of sending row */ 3606 svalues = gen_to->values; /* holds the length of receiving row */ 3607 nrecvs = gen_from->n; 3608 nsends = gen_to->n; 3609 rwaits = gen_from->requests; 3610 swaits = gen_to->requests; 3611 srow = gen_to->indices; /* local row index to be sent */ 3612 rstarts = gen_from->starts; 3613 sstarts = gen_to->starts; 3614 rprocs = gen_from->procs; 3615 sprocs = gen_to->procs; 3616 sstatus = gen_to->sstatus; 3617 3618 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3619 if (scall == MAT_INITIAL_MATRIX){ 3620 /* i-array */ 3621 /*---------*/ 3622 /* post receives */ 3623 for (i=0; i<nrecvs; i++){ 3624 rowlen = (PetscInt*)rvalues + rstarts[i]; 3625 nrows = rstarts[i+1]-rstarts[i]; 3626 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3627 } 3628 3629 /* pack the outgoing message */ 3630 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3631 rstartsj = sstartsj + nsends +1; 3632 sstartsj[0] = 0; rstartsj[0] = 0; 3633 len = 0; /* total length of j or a array to be sent */ 3634 k = 0; 3635 for (i=0; i<nsends; i++){ 3636 rowlen = (PetscInt*)svalues + sstarts[i]; 3637 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3638 for (j=0; j<nrows; j++) { 3639 row = srow[k] + b->rowners[rank]; /* global row idx */ 3640 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3641 len += rowlen[j]; 3642 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3643 k++; 3644 } 3645 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3646 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3647 } 3648 /* recvs and sends of i-array are completed */ 3649 i = nrecvs; 3650 while (i--) { 3651 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3652 } 3653 if (nsends) { 3654 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3655 } 3656 /* allocate buffers for sending j and a arrays */ 3657 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3658 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3659 3660 /* create i-array of B_oth */ 3661 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3662 b_othi[0] = 0; 3663 len = 0; /* total length of j or a array to be received */ 3664 k = 0; 3665 for (i=0; i<nrecvs; i++){ 3666 rowlen = (PetscInt*)rvalues + rstarts[i]; 3667 nrows = rstarts[i+1]-rstarts[i]; 3668 for (j=0; j<nrows; j++) { 3669 b_othi[k+1] = b_othi[k] + rowlen[j]; 3670 len += rowlen[j]; k++; 3671 } 3672 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3673 } 3674 3675 /* allocate space for j and a arrrays of B_oth */ 3676 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3677 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3678 3679 /* j-array */ 3680 /*---------*/ 3681 /* post receives of j-array */ 3682 for (i=0; i<nrecvs; i++){ 3683 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3684 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3685 } 3686 k = 0; 3687 for (i=0; i<nsends; i++){ 3688 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3689 bufJ = bufj+sstartsj[i]; 3690 for (j=0; j<nrows; j++) { 3691 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3692 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3693 for (l=0; l<ncols; l++){ 3694 *bufJ++ = cols[l]; 3695 } 3696 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3697 } 3698 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3699 } 3700 3701 /* recvs and sends of j-array are completed */ 3702 i = nrecvs; 3703 while (i--) { 3704 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3705 } 3706 if (nsends) { 3707 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3708 } 3709 } else if (scall == MAT_REUSE_MATRIX){ 3710 sstartsj = *startsj; 3711 rstartsj = sstartsj + nsends +1; 3712 bufa = *bufa_ptr; 3713 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3714 b_otha = b_oth->a; 3715 } else { 3716 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3717 } 3718 3719 /* a-array */ 3720 /*---------*/ 3721 /* post receives of a-array */ 3722 for (i=0; i<nrecvs; i++){ 3723 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3724 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3725 } 3726 k = 0; 3727 for (i=0; i<nsends; i++){ 3728 nrows = sstarts[i+1]-sstarts[i]; 3729 bufA = bufa+sstartsj[i]; 3730 for (j=0; j<nrows; j++) { 3731 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3732 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3733 for (l=0; l<ncols; l++){ 3734 *bufA++ = vals[l]; 3735 } 3736 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3737 3738 } 3739 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3740 } 3741 /* recvs and sends of a-array are completed */ 3742 i = nrecvs; 3743 while (i--) { 3744 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3745 } 3746 if (nsends) { 3747 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3748 } 3749 3750 if (scall == MAT_INITIAL_MATRIX){ 3751 /* put together the new matrix */ 3752 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3753 3754 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3755 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3756 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3757 b_oth->freedata = PETSC_TRUE; 3758 b_oth->nonew = 0; 3759 3760 ierr = PetscFree(bufj);CHKERRQ(ierr); 3761 if (!startsj || !bufa_ptr){ 3762 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3763 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3764 } else { 3765 *startsj = sstartsj; 3766 *bufa_ptr = bufa; 3767 } 3768 } 3769 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3770 3771 PetscFunctionReturn(0); 3772 } 3773 3774 /*MC 3775 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3776 3777 Options Database Keys: 3778 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3779 3780 Level: beginner 3781 3782 .seealso: MatCreateMPIAIJ 3783 M*/ 3784 3785 EXTERN_C_BEGIN 3786 #undef __FUNCT__ 3787 #define __FUNCT__ "MatCreate_MPIAIJ" 3788 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3789 { 3790 Mat_MPIAIJ *b; 3791 PetscErrorCode ierr; 3792 PetscInt i; 3793 PetscMPIInt size; 3794 3795 PetscFunctionBegin; 3796 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3797 3798 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3799 B->data = (void*)b; 3800 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3801 B->factor = 0; 3802 B->bs = 1; 3803 B->assembled = PETSC_FALSE; 3804 B->mapping = 0; 3805 3806 B->insertmode = NOT_SET_VALUES; 3807 b->size = size; 3808 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3809 3810 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3811 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3812 3813 /* the information in the maps duplicates the information computed below, eventually 3814 we should remove the duplicate information that is not contained in the maps */ 3815 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3816 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3817 3818 /* build local table of row and column ownerships */ 3819 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3820 ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3821 b->cowners = b->rowners + b->size + 2; 3822 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3823 b->rowners[0] = 0; 3824 for (i=2; i<=b->size; i++) { 3825 b->rowners[i] += b->rowners[i-1]; 3826 } 3827 b->rstart = b->rowners[b->rank]; 3828 b->rend = b->rowners[b->rank+1]; 3829 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3830 b->cowners[0] = 0; 3831 for (i=2; i<=b->size; i++) { 3832 b->cowners[i] += b->cowners[i-1]; 3833 } 3834 b->cstart = b->cowners[b->rank]; 3835 b->cend = b->cowners[b->rank+1]; 3836 3837 /* build cache for off array entries formed */ 3838 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3839 b->donotstash = PETSC_FALSE; 3840 b->colmap = 0; 3841 b->garray = 0; 3842 b->roworiented = PETSC_TRUE; 3843 3844 /* stuff used for matrix vector multiply */ 3845 b->lvec = PETSC_NULL; 3846 b->Mvctx = PETSC_NULL; 3847 3848 /* stuff for MatGetRow() */ 3849 b->rowindices = 0; 3850 b->rowvalues = 0; 3851 b->getrowactive = PETSC_FALSE; 3852 3853 /* Explicitly create 2 MATSEQAIJ matrices. */ 3854 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3855 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3856 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3857 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3858 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3859 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3860 3861 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3862 "MatStoreValues_MPIAIJ", 3863 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3864 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3865 "MatRetrieveValues_MPIAIJ", 3866 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3867 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3868 "MatGetDiagonalBlock_MPIAIJ", 3869 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3870 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3871 "MatIsTranspose_MPIAIJ", 3872 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3873 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3874 "MatMPIAIJSetPreallocation_MPIAIJ", 3875 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3876 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3877 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3878 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3879 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3880 "MatDiagonalScaleLocal_MPIAIJ", 3881 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3882 PetscFunctionReturn(0); 3883 } 3884 EXTERN_C_END 3885