1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/inline/spops.h" 5 6 /* 7 Local utility routine that creates a mapping from the global column 8 number to the local number in the off-diagonal part of the local 9 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 10 a slightly higher hash table cost; without it it is not scalable (each processor 11 has an order N integer array but is fast to acess. 12 */ 13 #undef __FUNCT__ 14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 16 { 17 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt n = aij->B->n,i; 20 21 PetscFunctionBegin; 22 #if defined (PETSC_USE_CTABLE) 23 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 24 for (i=0; i<n; i++){ 25 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 26 } 27 #else 28 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 29 ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 31 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 32 #endif 33 PetscFunctionReturn(0); 34 } 35 36 #define CHUNKSIZE 15 37 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 38 { \ 39 if (lastcol1 > col) low1 = 0; else high1 = nrow1; \ 40 lastcol1 = col;\ 41 while (high1-low1 > 5) { \ 42 t = (low1+high1)/2; \ 43 if (rp1[t] > col) high1 = t; \ 44 else low1 = t; \ 45 } \ 46 for (_i=low1; _i<high1; _i++) { \ 47 if (rp1[_i] > col) break; \ 48 if (rp1[_i] == col) { \ 49 if (addv == ADD_VALUES) ap1[_i] += value; \ 50 else ap1[_i] = value; \ 51 goto a_noinsert; \ 52 } \ 53 } \ 54 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 55 if (nonew == 1) goto a_noinsert; \ 56 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 57 if (nrow1 >= rmax1) { \ 58 /* there is no extra room in row, therefore enlarge */ \ 59 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 60 PetscScalar *new_a; \ 61 \ 62 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 63 \ 64 /* malloc new storage space */ \ 65 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 66 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 67 new_j = (PetscInt*)(new_a + new_nz); \ 68 new_i = new_j + new_nz; \ 69 \ 70 /* copy over old data into new slots */ \ 71 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 72 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 73 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow1)*sizeof(PetscInt));CHKERRQ(ierr); \ 74 len = (new_nz - CHUNKSIZE - ai[row] - nrow1); \ 75 ierr = PetscMemcpy(new_j+ai[row]+nrow1+CHUNKSIZE,aj+ai[row]+nrow1,len*sizeof(PetscInt));CHKERRQ(ierr); \ 76 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow1)*sizeof(PetscScalar));CHKERRQ(ierr); \ 77 ierr = PetscMemcpy(new_a+ai[row]+nrow1+CHUNKSIZE,aa+ai[row]+nrow1,len*sizeof(PetscScalar));CHKERRQ(ierr); \ 78 /* free up old matrix storage */ \ 79 \ 80 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 81 if (!a->singlemalloc) { \ 82 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 83 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 84 } \ 85 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 86 a->singlemalloc = PETSC_TRUE; \ 87 \ 88 rp1 = aj + ai[row]; ap1 = aa + ai[row]; \ 89 rmax1 = aimax[row] = aimax[row] + CHUNKSIZE; \ 90 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \ 91 a->maxnz += CHUNKSIZE; \ 92 a->reallocs++; \ 93 } \ 94 N = nrow1++ - 1; a->nz++; \ 95 /* shift up all the later entries in this row */ \ 96 for (ii=N; ii>=_i; ii--) { \ 97 rp1[ii+1] = rp1[ii]; \ 98 ap1[ii+1] = ap1[ii]; \ 99 } \ 100 rp1[_i] = col; \ 101 ap1[_i] = value; \ 102 a_noinsert: ; \ 103 ailen[row] = nrow1; \ 104 } 105 106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 107 { \ 108 if (lastcol2 > col) low2 = 0; else high2 = nrow2; \ 109 lastcol2 = col;\ 110 while (high2-low2 > 5) { \ 111 t = (low2+high2)/2; \ 112 if (rp2[t] > col) high2 = t; \ 113 else low2 = t; \ 114 } \ 115 for (_i=low2; _i<high2; _i++) { \ 116 if (rp2[_i] > col) break; \ 117 if (rp2[_i] == col) { \ 118 if (addv == ADD_VALUES) ap2[_i] += value; \ 119 else ap2[_i] = value; \ 120 goto b_noinsert; \ 121 } \ 122 } \ 123 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 124 if (nonew == 1) goto b_noinsert; \ 125 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 126 if (nrow2 >= rmax2) { \ 127 /* there is no extra room in row, therefore enlarge */ \ 128 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 129 PetscScalar *new_a; \ 130 \ 131 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 132 \ 133 /* malloc new storage space */ \ 134 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 135 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 136 new_j = (PetscInt*)(new_a + new_nz); \ 137 new_i = new_j + new_nz; \ 138 \ 139 /* copy over old data into new slots */ \ 140 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 141 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 142 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow2)*sizeof(PetscInt));CHKERRQ(ierr); \ 143 len = (new_nz - CHUNKSIZE - bi[row] - nrow2); \ 144 ierr = PetscMemcpy(new_j+bi[row]+nrow2+CHUNKSIZE,bj+bi[row]+nrow2,len*sizeof(PetscInt));CHKERRQ(ierr); \ 145 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow2)*sizeof(PetscScalar));CHKERRQ(ierr); \ 146 ierr = PetscMemcpy(new_a+bi[row]+nrow2+CHUNKSIZE,ba+bi[row]+nrow2,len*sizeof(PetscScalar));CHKERRQ(ierr); \ 147 /* free up old matrix storage */ \ 148 \ 149 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 150 if (!b->singlemalloc) { \ 151 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 152 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 153 } \ 154 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 155 b->singlemalloc = PETSC_TRUE; \ 156 \ 157 rp2 = bj + bi[row]; ap2 = ba + bi[row]; \ 158 rmax2 = bimax[row] = bimax[row] + CHUNKSIZE; \ 159 ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \ 160 b->maxnz += CHUNKSIZE; \ 161 b->reallocs++; \ 162 } \ 163 N = nrow2++ - 1; b->nz++; \ 164 /* shift up all the later entries in this row */ \ 165 for (ii=N; ii>=_i; ii--) { \ 166 rp2[ii+1] = rp2[ii]; \ 167 ap2[ii+1] = ap2[ii]; \ 168 } \ 169 rp2[_i] = col; \ 170 ap2[_i] = value; \ 171 b_noinsert: ; \ 172 bilen[row] = nrow2; \ 173 } 174 175 #undef __FUNCT__ 176 #define __FUNCT__ "MatSetValues_MPIAIJ" 177 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 178 { 179 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 180 PetscScalar value; 181 PetscErrorCode ierr; 182 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 183 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 184 PetscTruth roworiented = aij->roworiented; 185 186 /* Some Variables required in the macro */ 187 Mat A = aij->A; 188 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 189 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 190 PetscScalar *aa = a->a; 191 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 192 Mat B = aij->B; 193 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 194 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 195 PetscScalar *ba = b->a; 196 197 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 198 PetscInt nonew = a->nonew; 199 PetscScalar *ap1,*ap2; 200 201 PetscFunctionBegin; 202 for (i=0; i<m; i++) { 203 if (im[i] < 0) continue; 204 #if defined(PETSC_USE_DEBUG) 205 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 206 #endif 207 if (im[i] >= rstart && im[i] < rend) { 208 row = im[i] - rstart; 209 lastcol1 = -1; 210 rp1 = aj + ai[row]; 211 ap1 = aa + ai[row]; 212 rmax1 = aimax[row]; 213 nrow1 = ailen[row]; 214 low1 = 0; 215 high1 = nrow1; 216 lastcol2 = -1; 217 rp2 = bj + bi[row]; 218 ap2 = ba + bi[row]; \ 219 rmax2 = bimax[row]; 220 nrow2 = bilen[row]; \ 221 low2 = 0; 222 high2 = nrow2; 223 224 for (j=0; j<n; j++) { 225 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 226 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 227 if (in[j] >= cstart && in[j] < cend){ 228 col = in[j] - cstart; 229 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 230 } else if (in[j] < 0) continue; 231 #if defined(PETSC_USE_DEBUG) 232 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 233 #endif 234 else { 235 if (mat->was_assembled) { 236 if (!aij->colmap) { 237 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 238 } 239 #if defined (PETSC_USE_CTABLE) 240 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 241 col--; 242 #else 243 col = aij->colmap[in[j]] - 1; 244 #endif 245 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 246 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 247 col = in[j]; 248 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 249 B = aij->B; 250 b = (Mat_SeqAIJ*)B->data; 251 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 252 ba = b->a; 253 } 254 } else col = in[j]; 255 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 256 } 257 } 258 } else { 259 if (!aij->donotstash) { 260 if (roworiented) { 261 if (ignorezeroentries && v[i*n] == 0.0) continue; 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 263 } else { 264 if (ignorezeroentries && v[i] == 0.0) continue; 265 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 266 } 267 } 268 } 269 } 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatGetValues_MPIAIJ" 275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 276 { 277 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 278 PetscErrorCode ierr; 279 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 280 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 281 282 PetscFunctionBegin; 283 for (i=0; i<m; i++) { 284 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 285 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 286 if (idxm[i] >= rstart && idxm[i] < rend) { 287 row = idxm[i] - rstart; 288 for (j=0; j<n; j++) { 289 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 290 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 291 if (idxn[j] >= cstart && idxn[j] < cend){ 292 col = idxn[j] - cstart; 293 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 294 } else { 295 if (!aij->colmap) { 296 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 297 } 298 #if defined (PETSC_USE_CTABLE) 299 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 300 col --; 301 #else 302 col = aij->colmap[idxn[j]] - 1; 303 #endif 304 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 305 else { 306 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 307 } 308 } 309 } 310 } else { 311 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 312 } 313 } 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 320 { 321 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 322 PetscErrorCode ierr; 323 PetscInt nstash,reallocs; 324 InsertMode addv; 325 326 PetscFunctionBegin; 327 if (aij->donotstash) { 328 PetscFunctionReturn(0); 329 } 330 331 /* make sure all processors are either in INSERTMODE or ADDMODE */ 332 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 333 if (addv == (ADD_VALUES|INSERT_VALUES)) { 334 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 335 } 336 mat->insertmode = addv; /* in case this processor had no cache */ 337 338 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 339 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 340 ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr); 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNCT__ 345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 347 { 348 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 349 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 350 PetscErrorCode ierr; 351 PetscMPIInt n; 352 PetscInt i,j,rstart,ncols,flg; 353 PetscInt *row,*col,other_disassembled; 354 PetscScalar *val; 355 InsertMode addv = mat->insertmode; 356 357 PetscFunctionBegin; 358 if (!aij->donotstash) { 359 while (1) { 360 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 361 if (!flg) break; 362 363 for (i=0; i<n;) { 364 /* Now identify the consecutive vals belonging to the same row */ 365 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 366 if (j < n) ncols = j-i; 367 else ncols = n-i; 368 /* Now assemble all these values with a single function call */ 369 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 370 i = j; 371 } 372 } 373 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 374 } 375 a->compressedrow.use = PETSC_FALSE; 376 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 377 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 378 379 /* determine if any processor has disassembled, if so we must 380 also disassemble ourselfs, in order that we may reassemble. */ 381 /* 382 if nonzero structure of submatrix B cannot change then we know that 383 no processor disassembled thus we can skip this stuff 384 */ 385 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 386 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 387 if (mat->was_assembled && !other_disassembled) { 388 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 389 /* reaccess the b because aij->B was changed */ 390 b = (Mat_SeqAIJ *)aij->B->data; 391 } 392 } 393 394 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 395 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 396 } 397 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 398 b->compressedrow.use = PETSC_TRUE; 399 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 400 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 401 402 if (aij->rowvalues) { 403 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 404 aij->rowvalues = 0; 405 } 406 407 /* used by MatAXPY() */ 408 a->xtoy = 0; b->xtoy = 0; 409 a->XtoY = 0; b->XtoY = 0; 410 411 PetscFunctionReturn(0); 412 } 413 414 #undef __FUNCT__ 415 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 416 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 417 { 418 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 419 PetscErrorCode ierr; 420 421 PetscFunctionBegin; 422 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 423 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatZeroRows_MPIAIJ" 429 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 430 { 431 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 432 PetscErrorCode ierr; 433 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 434 PetscInt i,N,*rows,*owners = l->rowners; 435 PetscInt *nprocs,j,idx,nsends,row; 436 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 437 PetscInt *rvalues,count,base,slen,*source; 438 PetscInt *lens,*lrows,*values,rstart=l->rstart; 439 MPI_Comm comm = A->comm; 440 MPI_Request *send_waits,*recv_waits; 441 MPI_Status recv_status,*send_status; 442 IS istmp; 443 #if defined(PETSC_DEBUG) 444 PetscTruth found = PETSC_FALSE; 445 #endif 446 447 PetscFunctionBegin; 448 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 449 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 450 451 /* first count number of contributors to each processor */ 452 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 453 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 454 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 455 j = 0; 456 for (i=0; i<N; i++) { 457 if (lastidx > (idx = rows[i])) j = 0; 458 lastidx = idx; 459 for (; j<size; j++) { 460 if (idx >= owners[j] && idx < owners[j+1]) { 461 nprocs[2*j]++; 462 nprocs[2*j+1] = 1; 463 owner[i] = j; 464 #if defined(PETSC_DEBUG) 465 found = PETSC_TRUE; 466 #endif 467 break; 468 } 469 } 470 #if defined(PETSC_DEBUG) 471 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 472 found = PETSC_FALSE; 473 #endif 474 } 475 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 476 477 /* inform other processors of number of messages and max length*/ 478 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 479 480 /* post receives: */ 481 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 482 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 483 for (i=0; i<nrecvs; i++) { 484 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 485 } 486 487 /* do sends: 488 1) starts[i] gives the starting index in svalues for stuff going to 489 the ith processor 490 */ 491 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 492 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 493 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 494 starts[0] = 0; 495 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 496 for (i=0; i<N; i++) { 497 svalues[starts[owner[i]]++] = rows[i]; 498 } 499 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 500 501 starts[0] = 0; 502 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 503 count = 0; 504 for (i=0; i<size; i++) { 505 if (nprocs[2*i+1]) { 506 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 507 } 508 } 509 ierr = PetscFree(starts);CHKERRQ(ierr); 510 511 base = owners[rank]; 512 513 /* wait on receives */ 514 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 515 source = lens + nrecvs; 516 count = nrecvs; slen = 0; 517 while (count) { 518 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 519 /* unpack receives into our local space */ 520 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 521 source[imdex] = recv_status.MPI_SOURCE; 522 lens[imdex] = n; 523 slen += n; 524 count--; 525 } 526 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 527 528 /* move the data into the send scatter */ 529 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 530 count = 0; 531 for (i=0; i<nrecvs; i++) { 532 values = rvalues + i*nmax; 533 for (j=0; j<lens[i]; j++) { 534 lrows[count++] = values[j] - base; 535 } 536 } 537 ierr = PetscFree(rvalues);CHKERRQ(ierr); 538 ierr = PetscFree(lens);CHKERRQ(ierr); 539 ierr = PetscFree(owner);CHKERRQ(ierr); 540 ierr = PetscFree(nprocs);CHKERRQ(ierr); 541 542 /* actually zap the local rows */ 543 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 544 ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr); 545 546 /* 547 Zero the required rows. If the "diagonal block" of the matrix 548 is square and the user wishes to set the diagonal we use seperate 549 code so that MatSetValues() is not called for each diagonal allocating 550 new memory, thus calling lots of mallocs and slowing things down. 551 552 Contributed by: Mathew Knepley 553 */ 554 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 555 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 556 if (diag && (l->A->M == l->A->N)) { 557 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 558 } else if (diag) { 559 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 560 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 561 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 562 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 563 } 564 for (i = 0; i < slen; i++) { 565 row = lrows[i] + rstart; 566 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 567 } 568 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 569 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 570 } else { 571 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 572 } 573 ierr = ISDestroy(istmp);CHKERRQ(ierr); 574 ierr = PetscFree(lrows);CHKERRQ(ierr); 575 576 /* wait on sends */ 577 if (nsends) { 578 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 579 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 580 ierr = PetscFree(send_status);CHKERRQ(ierr); 581 } 582 ierr = PetscFree(send_waits);CHKERRQ(ierr); 583 ierr = PetscFree(svalues);CHKERRQ(ierr); 584 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNCT__ 589 #define __FUNCT__ "MatMult_MPIAIJ" 590 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 591 { 592 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 593 PetscErrorCode ierr; 594 PetscInt nt; 595 596 PetscFunctionBegin; 597 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 598 if (nt != A->n) { 599 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 600 } 601 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 603 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 604 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatMultAdd_MPIAIJ" 610 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 611 { 612 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 613 PetscErrorCode ierr; 614 615 PetscFunctionBegin; 616 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 617 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 618 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 619 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 625 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 626 { 627 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 628 PetscErrorCode ierr; 629 PetscTruth merged; 630 631 PetscFunctionBegin; 632 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 633 /* do nondiagonal part */ 634 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 635 if (!merged) { 636 /* send it on its way */ 637 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 638 /* do local part */ 639 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 640 /* receive remote parts: note this assumes the values are not actually */ 641 /* added in yy until the next line, */ 642 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 643 } else { 644 /* do local part */ 645 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 646 /* send it on its way */ 647 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 648 /* values actually were received in the Begin() but we need to call this nop */ 649 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 650 } 651 PetscFunctionReturn(0); 652 } 653 654 EXTERN_C_BEGIN 655 #undef __FUNCT__ 656 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 657 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 658 { 659 MPI_Comm comm; 660 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 661 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 662 IS Me,Notme; 663 PetscErrorCode ierr; 664 PetscInt M,N,first,last,*notme,i; 665 PetscMPIInt size; 666 667 PetscFunctionBegin; 668 669 /* Easy test: symmetric diagonal block */ 670 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 671 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 672 if (!*f) PetscFunctionReturn(0); 673 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 674 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 675 if (size == 1) PetscFunctionReturn(0); 676 677 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 678 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 679 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 680 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 681 for (i=0; i<first; i++) notme[i] = i; 682 for (i=last; i<M; i++) notme[i-last+first] = i; 683 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 684 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 685 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 686 Aoff = Aoffs[0]; 687 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 688 Boff = Boffs[0]; 689 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 690 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 691 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 692 ierr = ISDestroy(Me);CHKERRQ(ierr); 693 ierr = ISDestroy(Notme);CHKERRQ(ierr); 694 695 PetscFunctionReturn(0); 696 } 697 EXTERN_C_END 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 701 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 702 { 703 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 704 PetscErrorCode ierr; 705 706 PetscFunctionBegin; 707 /* do nondiagonal part */ 708 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 709 /* send it on its way */ 710 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 711 /* do local part */ 712 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 713 /* receive remote parts */ 714 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 715 PetscFunctionReturn(0); 716 } 717 718 /* 719 This only works correctly for square matrices where the subblock A->A is the 720 diagonal block 721 */ 722 #undef __FUNCT__ 723 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 724 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 725 { 726 PetscErrorCode ierr; 727 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 728 729 PetscFunctionBegin; 730 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 731 if (a->rstart != a->cstart || a->rend != a->cend) { 732 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 733 } 734 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNCT__ 739 #define __FUNCT__ "MatScale_MPIAIJ" 740 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 741 { 742 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 743 PetscErrorCode ierr; 744 745 PetscFunctionBegin; 746 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 747 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatDestroy_MPIAIJ" 753 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 754 { 755 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 756 PetscErrorCode ierr; 757 758 PetscFunctionBegin; 759 #if defined(PETSC_USE_LOG) 760 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 761 #endif 762 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 763 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 764 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 765 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 766 #if defined (PETSC_USE_CTABLE) 767 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 768 #else 769 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 770 #endif 771 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 772 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 773 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 774 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 775 ierr = PetscFree(aij);CHKERRQ(ierr); 776 777 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 778 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 779 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 780 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 781 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 782 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 783 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 784 PetscFunctionReturn(0); 785 } 786 787 #undef __FUNCT__ 788 #define __FUNCT__ "MatView_MPIAIJ_Binary" 789 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 790 { 791 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 792 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 793 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 794 PetscErrorCode ierr; 795 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 796 int fd; 797 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 798 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 799 PetscScalar *column_values; 800 801 PetscFunctionBegin; 802 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 803 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 804 nz = A->nz + B->nz; 805 if (!rank) { 806 header[0] = MAT_FILE_COOKIE; 807 header[1] = mat->M; 808 header[2] = mat->N; 809 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 810 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 811 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 812 /* get largest number of rows any processor has */ 813 rlen = mat->m; 814 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 815 for (i=1; i<size; i++) { 816 rlen = PetscMax(rlen,range[i+1] - range[i]); 817 } 818 } else { 819 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 820 rlen = mat->m; 821 } 822 823 /* load up the local row counts */ 824 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 825 for (i=0; i<mat->m; i++) { 826 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 827 } 828 829 /* store the row lengths to the file */ 830 if (!rank) { 831 MPI_Status status; 832 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 833 for (i=1; i<size; i++) { 834 rlen = range[i+1] - range[i]; 835 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 836 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 837 } 838 } else { 839 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 840 } 841 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 842 843 /* load up the local column indices */ 844 nzmax = nz; /* )th processor needs space a largest processor needs */ 845 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 846 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 847 cnt = 0; 848 for (i=0; i<mat->m; i++) { 849 for (j=B->i[i]; j<B->i[i+1]; j++) { 850 if ( (col = garray[B->j[j]]) > cstart) break; 851 column_indices[cnt++] = col; 852 } 853 for (k=A->i[i]; k<A->i[i+1]; k++) { 854 column_indices[cnt++] = A->j[k] + cstart; 855 } 856 for (; j<B->i[i+1]; j++) { 857 column_indices[cnt++] = garray[B->j[j]]; 858 } 859 } 860 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 861 862 /* store the column indices to the file */ 863 if (!rank) { 864 MPI_Status status; 865 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 866 for (i=1; i<size; i++) { 867 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 868 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 869 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 870 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 871 } 872 } else { 873 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 874 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 875 } 876 ierr = PetscFree(column_indices);CHKERRQ(ierr); 877 878 /* load up the local column values */ 879 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 880 cnt = 0; 881 for (i=0; i<mat->m; i++) { 882 for (j=B->i[i]; j<B->i[i+1]; j++) { 883 if ( garray[B->j[j]] > cstart) break; 884 column_values[cnt++] = B->a[j]; 885 } 886 for (k=A->i[i]; k<A->i[i+1]; k++) { 887 column_values[cnt++] = A->a[k]; 888 } 889 for (; j<B->i[i+1]; j++) { 890 column_values[cnt++] = B->a[j]; 891 } 892 } 893 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 894 895 /* store the column values to the file */ 896 if (!rank) { 897 MPI_Status status; 898 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 899 for (i=1; i<size; i++) { 900 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 901 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 902 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 903 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 904 } 905 } else { 906 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 907 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 908 } 909 ierr = PetscFree(column_values);CHKERRQ(ierr); 910 PetscFunctionReturn(0); 911 } 912 913 #undef __FUNCT__ 914 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 915 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 916 { 917 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 918 PetscErrorCode ierr; 919 PetscMPIInt rank = aij->rank,size = aij->size; 920 PetscTruth isdraw,iascii,isbinary; 921 PetscViewer sviewer; 922 PetscViewerFormat format; 923 924 PetscFunctionBegin; 925 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 926 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 927 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 928 if (iascii) { 929 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 930 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 931 MatInfo info; 932 PetscTruth inodes; 933 934 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 935 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 936 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 937 if (!inodes) { 938 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 939 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 940 } else { 941 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 942 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 943 } 944 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 945 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 946 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 947 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 948 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 949 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 950 PetscFunctionReturn(0); 951 } else if (format == PETSC_VIEWER_ASCII_INFO) { 952 PetscInt inodecount,inodelimit,*inodes; 953 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 954 if (inodes) { 955 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 956 } else { 957 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 958 } 959 PetscFunctionReturn(0); 960 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 961 PetscFunctionReturn(0); 962 } 963 } else if (isbinary) { 964 if (size == 1) { 965 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 966 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 967 } else { 968 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 969 } 970 PetscFunctionReturn(0); 971 } else if (isdraw) { 972 PetscDraw draw; 973 PetscTruth isnull; 974 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 975 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 976 } 977 978 if (size == 1) { 979 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 980 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 981 } else { 982 /* assemble the entire matrix onto first processor. */ 983 Mat A; 984 Mat_SeqAIJ *Aloc; 985 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 986 PetscScalar *a; 987 988 if (!rank) { 989 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 990 } else { 991 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 992 } 993 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 994 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 995 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 996 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 997 998 /* copy over the A part */ 999 Aloc = (Mat_SeqAIJ*)aij->A->data; 1000 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1001 row = aij->rstart; 1002 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 1003 for (i=0; i<m; i++) { 1004 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1005 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1006 } 1007 aj = Aloc->j; 1008 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 1009 1010 /* copy over the B part */ 1011 Aloc = (Mat_SeqAIJ*)aij->B->data; 1012 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1013 row = aij->rstart; 1014 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1015 ct = cols; 1016 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1017 for (i=0; i<m; i++) { 1018 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1019 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1020 } 1021 ierr = PetscFree(ct);CHKERRQ(ierr); 1022 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1023 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1024 /* 1025 Everyone has to call to draw the matrix since the graphics waits are 1026 synchronized across all processors that share the PetscDraw object 1027 */ 1028 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1029 if (!rank) { 1030 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1031 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1032 } 1033 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1034 ierr = MatDestroy(A);CHKERRQ(ierr); 1035 } 1036 PetscFunctionReturn(0); 1037 } 1038 1039 #undef __FUNCT__ 1040 #define __FUNCT__ "MatView_MPIAIJ" 1041 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1042 { 1043 PetscErrorCode ierr; 1044 PetscTruth iascii,isdraw,issocket,isbinary; 1045 1046 PetscFunctionBegin; 1047 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1048 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1049 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1050 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1051 if (iascii || isdraw || isbinary || issocket) { 1052 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1053 } else { 1054 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1055 } 1056 PetscFunctionReturn(0); 1057 } 1058 1059 1060 1061 #undef __FUNCT__ 1062 #define __FUNCT__ "MatRelax_MPIAIJ" 1063 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1064 { 1065 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1066 PetscErrorCode ierr; 1067 Vec bb1; 1068 PetscScalar mone=-1.0; 1069 1070 PetscFunctionBegin; 1071 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1072 1073 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1074 1075 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1076 if (flag & SOR_ZERO_INITIAL_GUESS) { 1077 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1078 its--; 1079 } 1080 1081 while (its--) { 1082 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1083 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1084 1085 /* update rhs: bb1 = bb - B*x */ 1086 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1087 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1088 1089 /* local sweep */ 1090 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1091 CHKERRQ(ierr); 1092 } 1093 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1094 if (flag & SOR_ZERO_INITIAL_GUESS) { 1095 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1096 its--; 1097 } 1098 while (its--) { 1099 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1100 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1101 1102 /* update rhs: bb1 = bb - B*x */ 1103 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1104 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1105 1106 /* local sweep */ 1107 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1108 CHKERRQ(ierr); 1109 } 1110 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1111 if (flag & SOR_ZERO_INITIAL_GUESS) { 1112 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1113 its--; 1114 } 1115 while (its--) { 1116 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1117 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1118 1119 /* update rhs: bb1 = bb - B*x */ 1120 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1121 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1122 1123 /* local sweep */ 1124 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1125 CHKERRQ(ierr); 1126 } 1127 } else { 1128 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1129 } 1130 1131 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1137 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1138 { 1139 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1140 Mat A = mat->A,B = mat->B; 1141 PetscErrorCode ierr; 1142 PetscReal isend[5],irecv[5]; 1143 1144 PetscFunctionBegin; 1145 info->block_size = 1.0; 1146 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1147 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1148 isend[3] = info->memory; isend[4] = info->mallocs; 1149 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1150 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1151 isend[3] += info->memory; isend[4] += info->mallocs; 1152 if (flag == MAT_LOCAL) { 1153 info->nz_used = isend[0]; 1154 info->nz_allocated = isend[1]; 1155 info->nz_unneeded = isend[2]; 1156 info->memory = isend[3]; 1157 info->mallocs = isend[4]; 1158 } else if (flag == MAT_GLOBAL_MAX) { 1159 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1160 info->nz_used = irecv[0]; 1161 info->nz_allocated = irecv[1]; 1162 info->nz_unneeded = irecv[2]; 1163 info->memory = irecv[3]; 1164 info->mallocs = irecv[4]; 1165 } else if (flag == MAT_GLOBAL_SUM) { 1166 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1167 info->nz_used = irecv[0]; 1168 info->nz_allocated = irecv[1]; 1169 info->nz_unneeded = irecv[2]; 1170 info->memory = irecv[3]; 1171 info->mallocs = irecv[4]; 1172 } 1173 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1174 info->fill_ratio_needed = 0; 1175 info->factor_mallocs = 0; 1176 info->rows_global = (double)matin->M; 1177 info->columns_global = (double)matin->N; 1178 info->rows_local = (double)matin->m; 1179 info->columns_local = (double)matin->N; 1180 1181 PetscFunctionReturn(0); 1182 } 1183 1184 #undef __FUNCT__ 1185 #define __FUNCT__ "MatSetOption_MPIAIJ" 1186 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1187 { 1188 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1189 PetscErrorCode ierr; 1190 1191 PetscFunctionBegin; 1192 switch (op) { 1193 case MAT_NO_NEW_NONZERO_LOCATIONS: 1194 case MAT_YES_NEW_NONZERO_LOCATIONS: 1195 case MAT_COLUMNS_UNSORTED: 1196 case MAT_COLUMNS_SORTED: 1197 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1198 case MAT_KEEP_ZEROED_ROWS: 1199 case MAT_NEW_NONZERO_LOCATION_ERR: 1200 case MAT_USE_INODES: 1201 case MAT_DO_NOT_USE_INODES: 1202 case MAT_IGNORE_ZERO_ENTRIES: 1203 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1204 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1205 break; 1206 case MAT_ROW_ORIENTED: 1207 a->roworiented = PETSC_TRUE; 1208 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1209 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1210 break; 1211 case MAT_ROWS_SORTED: 1212 case MAT_ROWS_UNSORTED: 1213 case MAT_YES_NEW_DIAGONALS: 1214 ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr); 1215 break; 1216 case MAT_COLUMN_ORIENTED: 1217 a->roworiented = PETSC_FALSE; 1218 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1219 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1220 break; 1221 case MAT_IGNORE_OFF_PROC_ENTRIES: 1222 a->donotstash = PETSC_TRUE; 1223 break; 1224 case MAT_NO_NEW_DIAGONALS: 1225 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1226 case MAT_SYMMETRIC: 1227 case MAT_STRUCTURALLY_SYMMETRIC: 1228 case MAT_HERMITIAN: 1229 case MAT_SYMMETRY_ETERNAL: 1230 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1231 break; 1232 case MAT_NOT_SYMMETRIC: 1233 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1234 case MAT_NOT_HERMITIAN: 1235 case MAT_NOT_SYMMETRY_ETERNAL: 1236 break; 1237 default: 1238 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1239 } 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatGetRow_MPIAIJ" 1245 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1246 { 1247 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1248 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1249 PetscErrorCode ierr; 1250 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1251 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1252 PetscInt *cmap,*idx_p; 1253 1254 PetscFunctionBegin; 1255 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1256 mat->getrowactive = PETSC_TRUE; 1257 1258 if (!mat->rowvalues && (idx || v)) { 1259 /* 1260 allocate enough space to hold information from the longest row. 1261 */ 1262 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1263 PetscInt max = 1,tmp; 1264 for (i=0; i<matin->m; i++) { 1265 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1266 if (max < tmp) { max = tmp; } 1267 } 1268 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1269 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1270 } 1271 1272 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1273 lrow = row - rstart; 1274 1275 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1276 if (!v) {pvA = 0; pvB = 0;} 1277 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1278 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1279 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1280 nztot = nzA + nzB; 1281 1282 cmap = mat->garray; 1283 if (v || idx) { 1284 if (nztot) { 1285 /* Sort by increasing column numbers, assuming A and B already sorted */ 1286 PetscInt imark = -1; 1287 if (v) { 1288 *v = v_p = mat->rowvalues; 1289 for (i=0; i<nzB; i++) { 1290 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1291 else break; 1292 } 1293 imark = i; 1294 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1295 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1296 } 1297 if (idx) { 1298 *idx = idx_p = mat->rowindices; 1299 if (imark > -1) { 1300 for (i=0; i<imark; i++) { 1301 idx_p[i] = cmap[cworkB[i]]; 1302 } 1303 } else { 1304 for (i=0; i<nzB; i++) { 1305 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1306 else break; 1307 } 1308 imark = i; 1309 } 1310 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1311 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1312 } 1313 } else { 1314 if (idx) *idx = 0; 1315 if (v) *v = 0; 1316 } 1317 } 1318 *nz = nztot; 1319 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1320 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1326 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1327 { 1328 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1329 1330 PetscFunctionBegin; 1331 if (!aij->getrowactive) { 1332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1333 } 1334 aij->getrowactive = PETSC_FALSE; 1335 PetscFunctionReturn(0); 1336 } 1337 1338 #undef __FUNCT__ 1339 #define __FUNCT__ "MatNorm_MPIAIJ" 1340 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1341 { 1342 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1343 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1344 PetscErrorCode ierr; 1345 PetscInt i,j,cstart = aij->cstart; 1346 PetscReal sum = 0.0; 1347 PetscScalar *v; 1348 1349 PetscFunctionBegin; 1350 if (aij->size == 1) { 1351 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1352 } else { 1353 if (type == NORM_FROBENIUS) { 1354 v = amat->a; 1355 for (i=0; i<amat->nz; i++) { 1356 #if defined(PETSC_USE_COMPLEX) 1357 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1358 #else 1359 sum += (*v)*(*v); v++; 1360 #endif 1361 } 1362 v = bmat->a; 1363 for (i=0; i<bmat->nz; i++) { 1364 #if defined(PETSC_USE_COMPLEX) 1365 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1366 #else 1367 sum += (*v)*(*v); v++; 1368 #endif 1369 } 1370 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1371 *norm = sqrt(*norm); 1372 } else if (type == NORM_1) { /* max column norm */ 1373 PetscReal *tmp,*tmp2; 1374 PetscInt *jj,*garray = aij->garray; 1375 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1376 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1377 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1378 *norm = 0.0; 1379 v = amat->a; jj = amat->j; 1380 for (j=0; j<amat->nz; j++) { 1381 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1382 } 1383 v = bmat->a; jj = bmat->j; 1384 for (j=0; j<bmat->nz; j++) { 1385 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1386 } 1387 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1388 for (j=0; j<mat->N; j++) { 1389 if (tmp2[j] > *norm) *norm = tmp2[j]; 1390 } 1391 ierr = PetscFree(tmp);CHKERRQ(ierr); 1392 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1393 } else if (type == NORM_INFINITY) { /* max row norm */ 1394 PetscReal ntemp = 0.0; 1395 for (j=0; j<aij->A->m; j++) { 1396 v = amat->a + amat->i[j]; 1397 sum = 0.0; 1398 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1399 sum += PetscAbsScalar(*v); v++; 1400 } 1401 v = bmat->a + bmat->i[j]; 1402 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1403 sum += PetscAbsScalar(*v); v++; 1404 } 1405 if (sum > ntemp) ntemp = sum; 1406 } 1407 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1408 } else { 1409 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1410 } 1411 } 1412 PetscFunctionReturn(0); 1413 } 1414 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatTranspose_MPIAIJ" 1417 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1418 { 1419 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1420 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1421 PetscErrorCode ierr; 1422 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1423 Mat B; 1424 PetscScalar *array; 1425 1426 PetscFunctionBegin; 1427 if (!matout && M != N) { 1428 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1429 } 1430 1431 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1432 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1433 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1434 1435 /* copy over the A part */ 1436 Aloc = (Mat_SeqAIJ*)a->A->data; 1437 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1438 row = a->rstart; 1439 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1440 for (i=0; i<m; i++) { 1441 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1442 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1443 } 1444 aj = Aloc->j; 1445 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1446 1447 /* copy over the B part */ 1448 Aloc = (Mat_SeqAIJ*)a->B->data; 1449 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1450 row = a->rstart; 1451 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1452 ct = cols; 1453 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1454 for (i=0; i<m; i++) { 1455 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1456 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1457 } 1458 ierr = PetscFree(ct);CHKERRQ(ierr); 1459 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1460 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1461 if (matout) { 1462 *matout = B; 1463 } else { 1464 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1465 } 1466 PetscFunctionReturn(0); 1467 } 1468 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1471 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1472 { 1473 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1474 Mat a = aij->A,b = aij->B; 1475 PetscErrorCode ierr; 1476 PetscInt s1,s2,s3; 1477 1478 PetscFunctionBegin; 1479 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1480 if (rr) { 1481 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1482 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1483 /* Overlap communication with computation. */ 1484 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1485 } 1486 if (ll) { 1487 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1488 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1489 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1490 } 1491 /* scale the diagonal block */ 1492 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1493 1494 if (rr) { 1495 /* Do a scatter end and then right scale the off-diagonal block */ 1496 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1497 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1498 } 1499 1500 PetscFunctionReturn(0); 1501 } 1502 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1506 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1507 { 1508 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1509 PetscErrorCode ierr; 1510 1511 PetscFunctionBegin; 1512 if (!a->rank) { 1513 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1514 } 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1520 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1521 { 1522 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1523 PetscErrorCode ierr; 1524 1525 PetscFunctionBegin; 1526 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1527 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1528 PetscFunctionReturn(0); 1529 } 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1532 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1533 { 1534 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1535 PetscErrorCode ierr; 1536 1537 PetscFunctionBegin; 1538 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1539 PetscFunctionReturn(0); 1540 } 1541 1542 #undef __FUNCT__ 1543 #define __FUNCT__ "MatEqual_MPIAIJ" 1544 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1545 { 1546 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1547 Mat a,b,c,d; 1548 PetscTruth flg; 1549 PetscErrorCode ierr; 1550 1551 PetscFunctionBegin; 1552 a = matA->A; b = matA->B; 1553 c = matB->A; d = matB->B; 1554 1555 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1556 if (flg) { 1557 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1558 } 1559 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1560 PetscFunctionReturn(0); 1561 } 1562 1563 #undef __FUNCT__ 1564 #define __FUNCT__ "MatCopy_MPIAIJ" 1565 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1566 { 1567 PetscErrorCode ierr; 1568 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1569 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1570 1571 PetscFunctionBegin; 1572 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1573 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1574 /* because of the column compression in the off-processor part of the matrix a->B, 1575 the number of columns in a->B and b->B may be different, hence we cannot call 1576 the MatCopy() directly on the two parts. If need be, we can provide a more 1577 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1578 then copying the submatrices */ 1579 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1580 } else { 1581 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1582 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1583 } 1584 PetscFunctionReturn(0); 1585 } 1586 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1589 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1590 { 1591 PetscErrorCode ierr; 1592 1593 PetscFunctionBegin; 1594 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1595 PetscFunctionReturn(0); 1596 } 1597 1598 #include "petscblaslapack.h" 1599 #undef __FUNCT__ 1600 #define __FUNCT__ "MatAXPY_MPIAIJ" 1601 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1602 { 1603 PetscErrorCode ierr; 1604 PetscInt i; 1605 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1606 PetscBLASInt bnz,one=1; 1607 Mat_SeqAIJ *x,*y; 1608 1609 PetscFunctionBegin; 1610 if (str == SAME_NONZERO_PATTERN) { 1611 x = (Mat_SeqAIJ *)xx->A->data; 1612 y = (Mat_SeqAIJ *)yy->A->data; 1613 bnz = (PetscBLASInt)x->nz; 1614 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1615 x = (Mat_SeqAIJ *)xx->B->data; 1616 y = (Mat_SeqAIJ *)yy->B->data; 1617 bnz = (PetscBLASInt)x->nz; 1618 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1619 } else if (str == SUBSET_NONZERO_PATTERN) { 1620 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1621 1622 x = (Mat_SeqAIJ *)xx->B->data; 1623 y = (Mat_SeqAIJ *)yy->B->data; 1624 if (y->xtoy && y->XtoY != xx->B) { 1625 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1626 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1627 } 1628 if (!y->xtoy) { /* get xtoy */ 1629 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1630 y->XtoY = xx->B; 1631 } 1632 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1633 } else { 1634 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1635 } 1636 PetscFunctionReturn(0); 1637 } 1638 1639 /* -------------------------------------------------------------------*/ 1640 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1641 MatGetRow_MPIAIJ, 1642 MatRestoreRow_MPIAIJ, 1643 MatMult_MPIAIJ, 1644 /* 4*/ MatMultAdd_MPIAIJ, 1645 MatMultTranspose_MPIAIJ, 1646 MatMultTransposeAdd_MPIAIJ, 1647 0, 1648 0, 1649 0, 1650 /*10*/ 0, 1651 0, 1652 0, 1653 MatRelax_MPIAIJ, 1654 MatTranspose_MPIAIJ, 1655 /*15*/ MatGetInfo_MPIAIJ, 1656 MatEqual_MPIAIJ, 1657 MatGetDiagonal_MPIAIJ, 1658 MatDiagonalScale_MPIAIJ, 1659 MatNorm_MPIAIJ, 1660 /*20*/ MatAssemblyBegin_MPIAIJ, 1661 MatAssemblyEnd_MPIAIJ, 1662 0, 1663 MatSetOption_MPIAIJ, 1664 MatZeroEntries_MPIAIJ, 1665 /*25*/ MatZeroRows_MPIAIJ, 1666 0, 1667 0, 1668 0, 1669 0, 1670 /*30*/ MatSetUpPreallocation_MPIAIJ, 1671 0, 1672 0, 1673 0, 1674 0, 1675 /*35*/ MatDuplicate_MPIAIJ, 1676 0, 1677 0, 1678 0, 1679 0, 1680 /*40*/ MatAXPY_MPIAIJ, 1681 MatGetSubMatrices_MPIAIJ, 1682 MatIncreaseOverlap_MPIAIJ, 1683 MatGetValues_MPIAIJ, 1684 MatCopy_MPIAIJ, 1685 /*45*/ MatPrintHelp_MPIAIJ, 1686 MatScale_MPIAIJ, 1687 0, 1688 0, 1689 0, 1690 /*50*/ MatSetBlockSize_MPIAIJ, 1691 0, 1692 0, 1693 0, 1694 0, 1695 /*55*/ MatFDColoringCreate_MPIAIJ, 1696 0, 1697 MatSetUnfactored_MPIAIJ, 1698 0, 1699 0, 1700 /*60*/ MatGetSubMatrix_MPIAIJ, 1701 MatDestroy_MPIAIJ, 1702 MatView_MPIAIJ, 1703 MatGetPetscMaps_Petsc, 1704 0, 1705 /*65*/ 0, 1706 0, 1707 0, 1708 0, 1709 0, 1710 /*70*/ 0, 1711 0, 1712 MatSetColoring_MPIAIJ, 1713 #if defined(PETSC_HAVE_ADIC) 1714 MatSetValuesAdic_MPIAIJ, 1715 #else 1716 0, 1717 #endif 1718 MatSetValuesAdifor_MPIAIJ, 1719 /*75*/ 0, 1720 0, 1721 0, 1722 0, 1723 0, 1724 /*80*/ 0, 1725 0, 1726 0, 1727 0, 1728 /*84*/ MatLoad_MPIAIJ, 1729 0, 1730 0, 1731 0, 1732 0, 1733 0, 1734 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1735 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1736 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1737 MatPtAP_Basic, 1738 MatPtAPSymbolic_MPIAIJ, 1739 /*95*/ MatPtAPNumeric_MPIAIJ, 1740 0, 1741 0, 1742 0, 1743 0, 1744 /*100*/0, 1745 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1746 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1747 }; 1748 1749 /* ----------------------------------------------------------------------------------------*/ 1750 1751 EXTERN_C_BEGIN 1752 #undef __FUNCT__ 1753 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1754 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat) 1755 { 1756 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1757 PetscErrorCode ierr; 1758 1759 PetscFunctionBegin; 1760 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1761 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1762 PetscFunctionReturn(0); 1763 } 1764 EXTERN_C_END 1765 1766 EXTERN_C_BEGIN 1767 #undef __FUNCT__ 1768 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1769 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat) 1770 { 1771 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1772 PetscErrorCode ierr; 1773 1774 PetscFunctionBegin; 1775 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1776 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1777 PetscFunctionReturn(0); 1778 } 1779 EXTERN_C_END 1780 1781 #include "petscpc.h" 1782 EXTERN_C_BEGIN 1783 #undef __FUNCT__ 1784 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1785 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1786 { 1787 Mat_MPIAIJ *b; 1788 PetscErrorCode ierr; 1789 PetscInt i; 1790 1791 PetscFunctionBegin; 1792 B->preallocated = PETSC_TRUE; 1793 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1794 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1795 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1796 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1797 if (d_nnz) { 1798 for (i=0; i<B->m; i++) { 1799 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1800 } 1801 } 1802 if (o_nnz) { 1803 for (i=0; i<B->m; i++) { 1804 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1805 } 1806 } 1807 b = (Mat_MPIAIJ*)B->data; 1808 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1809 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1810 1811 PetscFunctionReturn(0); 1812 } 1813 EXTERN_C_END 1814 1815 #undef __FUNCT__ 1816 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1817 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1818 { 1819 Mat mat; 1820 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1821 PetscErrorCode ierr; 1822 1823 PetscFunctionBegin; 1824 *newmat = 0; 1825 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1826 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1827 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1828 a = (Mat_MPIAIJ*)mat->data; 1829 1830 mat->factor = matin->factor; 1831 mat->bs = matin->bs; 1832 mat->assembled = PETSC_TRUE; 1833 mat->insertmode = NOT_SET_VALUES; 1834 mat->preallocated = PETSC_TRUE; 1835 1836 a->rstart = oldmat->rstart; 1837 a->rend = oldmat->rend; 1838 a->cstart = oldmat->cstart; 1839 a->cend = oldmat->cend; 1840 a->size = oldmat->size; 1841 a->rank = oldmat->rank; 1842 a->donotstash = oldmat->donotstash; 1843 a->roworiented = oldmat->roworiented; 1844 a->rowindices = 0; 1845 a->rowvalues = 0; 1846 a->getrowactive = PETSC_FALSE; 1847 1848 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1849 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1850 if (oldmat->colmap) { 1851 #if defined (PETSC_USE_CTABLE) 1852 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1853 #else 1854 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1855 ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1856 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1857 #endif 1858 } else a->colmap = 0; 1859 if (oldmat->garray) { 1860 PetscInt len; 1861 len = oldmat->B->n; 1862 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1863 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1864 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1865 } else a->garray = 0; 1866 1867 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1868 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1869 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1870 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1871 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1872 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1873 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1874 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1875 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1876 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1877 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1878 *newmat = mat; 1879 PetscFunctionReturn(0); 1880 } 1881 1882 #include "petscsys.h" 1883 1884 #undef __FUNCT__ 1885 #define __FUNCT__ "MatLoad_MPIAIJ" 1886 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1887 { 1888 Mat A; 1889 PetscScalar *vals,*svals; 1890 MPI_Comm comm = ((PetscObject)viewer)->comm; 1891 MPI_Status status; 1892 PetscErrorCode ierr; 1893 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1894 PetscInt i,nz,j,rstart,rend,mmax; 1895 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1896 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1897 PetscInt cend,cstart,n,*rowners; 1898 int fd; 1899 1900 PetscFunctionBegin; 1901 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1902 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1903 if (!rank) { 1904 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1905 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1906 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1907 } 1908 1909 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1910 M = header[1]; N = header[2]; 1911 /* determine ownership of all rows */ 1912 m = M/size + ((M % size) > rank); 1913 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1914 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1915 1916 /* First process needs enough room for process with most rows */ 1917 if (!rank) { 1918 mmax = rowners[1]; 1919 for (i=2; i<size; i++) { 1920 mmax = PetscMax(mmax,rowners[i]); 1921 } 1922 } else mmax = m; 1923 1924 rowners[0] = 0; 1925 for (i=2; i<=size; i++) { 1926 mmax = PetscMax(mmax,rowners[i]); 1927 rowners[i] += rowners[i-1]; 1928 } 1929 rstart = rowners[rank]; 1930 rend = rowners[rank+1]; 1931 1932 /* distribute row lengths to all processors */ 1933 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 1934 if (!rank) { 1935 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1936 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1937 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1938 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1939 for (j=0; j<m; j++) { 1940 procsnz[0] += ourlens[j]; 1941 } 1942 for (i=1; i<size; i++) { 1943 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1944 /* calculate the number of nonzeros on each processor */ 1945 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1946 procsnz[i] += rowlengths[j]; 1947 } 1948 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1949 } 1950 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1951 } else { 1952 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1953 } 1954 1955 if (!rank) { 1956 /* determine max buffer needed and allocate it */ 1957 maxnz = 0; 1958 for (i=0; i<size; i++) { 1959 maxnz = PetscMax(maxnz,procsnz[i]); 1960 } 1961 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1962 1963 /* read in my part of the matrix column indices */ 1964 nz = procsnz[0]; 1965 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1966 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1967 1968 /* read in every one elses and ship off */ 1969 for (i=1; i<size; i++) { 1970 nz = procsnz[i]; 1971 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1972 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1973 } 1974 ierr = PetscFree(cols);CHKERRQ(ierr); 1975 } else { 1976 /* determine buffer space needed for message */ 1977 nz = 0; 1978 for (i=0; i<m; i++) { 1979 nz += ourlens[i]; 1980 } 1981 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1982 1983 /* receive message of column indices*/ 1984 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1985 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1986 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1987 } 1988 1989 /* determine column ownership if matrix is not square */ 1990 if (N != M) { 1991 n = N/size + ((N % size) > rank); 1992 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1993 cstart = cend - n; 1994 } else { 1995 cstart = rstart; 1996 cend = rend; 1997 n = cend - cstart; 1998 } 1999 2000 /* loop over local rows, determining number of off diagonal entries */ 2001 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2002 jj = 0; 2003 for (i=0; i<m; i++) { 2004 for (j=0; j<ourlens[i]; j++) { 2005 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2006 jj++; 2007 } 2008 } 2009 2010 /* create our matrix */ 2011 for (i=0; i<m; i++) { 2012 ourlens[i] -= offlens[i]; 2013 } 2014 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2015 ierr = MatSetType(A,type);CHKERRQ(ierr); 2016 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2017 2018 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2019 for (i=0; i<m; i++) { 2020 ourlens[i] += offlens[i]; 2021 } 2022 2023 if (!rank) { 2024 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2025 2026 /* read in my part of the matrix numerical values */ 2027 nz = procsnz[0]; 2028 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2029 2030 /* insert into matrix */ 2031 jj = rstart; 2032 smycols = mycols; 2033 svals = vals; 2034 for (i=0; i<m; i++) { 2035 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2036 smycols += ourlens[i]; 2037 svals += ourlens[i]; 2038 jj++; 2039 } 2040 2041 /* read in other processors and ship out */ 2042 for (i=1; i<size; i++) { 2043 nz = procsnz[i]; 2044 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2045 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2046 } 2047 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2048 } else { 2049 /* receive numeric values */ 2050 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2051 2052 /* receive message of values*/ 2053 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2054 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2055 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2056 2057 /* insert into matrix */ 2058 jj = rstart; 2059 smycols = mycols; 2060 svals = vals; 2061 for (i=0; i<m; i++) { 2062 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2063 smycols += ourlens[i]; 2064 svals += ourlens[i]; 2065 jj++; 2066 } 2067 } 2068 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2069 ierr = PetscFree(vals);CHKERRQ(ierr); 2070 ierr = PetscFree(mycols);CHKERRQ(ierr); 2071 ierr = PetscFree(rowners);CHKERRQ(ierr); 2072 2073 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2074 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2075 *newmat = A; 2076 PetscFunctionReturn(0); 2077 } 2078 2079 #undef __FUNCT__ 2080 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2081 /* 2082 Not great since it makes two copies of the submatrix, first an SeqAIJ 2083 in local and then by concatenating the local matrices the end result. 2084 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2085 */ 2086 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2087 { 2088 PetscErrorCode ierr; 2089 PetscMPIInt rank,size; 2090 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2091 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2092 Mat *local,M,Mreuse; 2093 PetscScalar *vwork,*aa; 2094 MPI_Comm comm = mat->comm; 2095 Mat_SeqAIJ *aij; 2096 2097 2098 PetscFunctionBegin; 2099 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2100 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2101 2102 if (call == MAT_REUSE_MATRIX) { 2103 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2104 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2105 local = &Mreuse; 2106 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2107 } else { 2108 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2109 Mreuse = *local; 2110 ierr = PetscFree(local);CHKERRQ(ierr); 2111 } 2112 2113 /* 2114 m - number of local rows 2115 n - number of columns (same on all processors) 2116 rstart - first row in new global matrix generated 2117 */ 2118 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2119 if (call == MAT_INITIAL_MATRIX) { 2120 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2121 ii = aij->i; 2122 jj = aij->j; 2123 2124 /* 2125 Determine the number of non-zeros in the diagonal and off-diagonal 2126 portions of the matrix in order to do correct preallocation 2127 */ 2128 2129 /* first get start and end of "diagonal" columns */ 2130 if (csize == PETSC_DECIDE) { 2131 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2132 if (mglobal == n) { /* square matrix */ 2133 nlocal = m; 2134 } else { 2135 nlocal = n/size + ((n % size) > rank); 2136 } 2137 } else { 2138 nlocal = csize; 2139 } 2140 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2141 rstart = rend - nlocal; 2142 if (rank == size - 1 && rend != n) { 2143 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2144 } 2145 2146 /* next, compute all the lengths */ 2147 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2148 olens = dlens + m; 2149 for (i=0; i<m; i++) { 2150 jend = ii[i+1] - ii[i]; 2151 olen = 0; 2152 dlen = 0; 2153 for (j=0; j<jend; j++) { 2154 if (*jj < rstart || *jj >= rend) olen++; 2155 else dlen++; 2156 jj++; 2157 } 2158 olens[i] = olen; 2159 dlens[i] = dlen; 2160 } 2161 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2162 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2163 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2164 ierr = PetscFree(dlens);CHKERRQ(ierr); 2165 } else { 2166 PetscInt ml,nl; 2167 2168 M = *newmat; 2169 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2170 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2171 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2172 /* 2173 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2174 rather than the slower MatSetValues(). 2175 */ 2176 M->was_assembled = PETSC_TRUE; 2177 M->assembled = PETSC_FALSE; 2178 } 2179 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2180 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2181 ii = aij->i; 2182 jj = aij->j; 2183 aa = aij->a; 2184 for (i=0; i<m; i++) { 2185 row = rstart + i; 2186 nz = ii[i+1] - ii[i]; 2187 cwork = jj; jj += nz; 2188 vwork = aa; aa += nz; 2189 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2190 } 2191 2192 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2193 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2194 *newmat = M; 2195 2196 /* save submatrix used in processor for next request */ 2197 if (call == MAT_INITIAL_MATRIX) { 2198 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2199 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2200 } 2201 2202 PetscFunctionReturn(0); 2203 } 2204 2205 EXTERN_C_BEGIN 2206 #undef __FUNCT__ 2207 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2208 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2209 { 2210 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2211 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2212 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2213 const PetscInt *JJ; 2214 PetscScalar *values; 2215 PetscErrorCode ierr; 2216 2217 PetscFunctionBegin; 2218 #if defined(PETSC_OPT_g) 2219 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2220 #endif 2221 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2222 o_nnz = d_nnz + m; 2223 2224 for (i=0; i<m; i++) { 2225 nnz = I[i+1]- I[i]; 2226 JJ = J + I[i]; 2227 nnz_max = PetscMax(nnz_max,nnz); 2228 #if defined(PETSC_OPT_g) 2229 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2230 #endif 2231 for (j=0; j<nnz; j++) { 2232 if (*JJ >= cstart) break; 2233 JJ++; 2234 } 2235 d = 0; 2236 for (; j<nnz; j++) { 2237 if (*JJ++ >= cend) break; 2238 d++; 2239 } 2240 d_nnz[i] = d; 2241 o_nnz[i] = nnz - d; 2242 } 2243 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2244 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2245 2246 if (v) values = (PetscScalar*)v; 2247 else { 2248 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2249 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2250 } 2251 2252 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2253 for (i=0; i<m; i++) { 2254 ii = i + rstart; 2255 nnz = I[i+1]- I[i]; 2256 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2257 } 2258 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2259 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2260 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2261 2262 if (!v) { 2263 ierr = PetscFree(values);CHKERRQ(ierr); 2264 } 2265 PetscFunctionReturn(0); 2266 } 2267 EXTERN_C_END 2268 2269 #undef __FUNCT__ 2270 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2271 /*@C 2272 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2273 (the default parallel PETSc format). 2274 2275 Collective on MPI_Comm 2276 2277 Input Parameters: 2278 + A - the matrix 2279 . i - the indices into j for the start of each local row (starts with zero) 2280 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2281 - v - optional values in the matrix 2282 2283 Level: developer 2284 2285 .keywords: matrix, aij, compressed row, sparse, parallel 2286 2287 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2288 @*/ 2289 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2290 { 2291 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2292 2293 PetscFunctionBegin; 2294 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2295 if (f) { 2296 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2297 } 2298 PetscFunctionReturn(0); 2299 } 2300 2301 #undef __FUNCT__ 2302 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2303 /*@C 2304 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2305 (the default parallel PETSc format). For good matrix assembly performance 2306 the user should preallocate the matrix storage by setting the parameters 2307 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2308 performance can be increased by more than a factor of 50. 2309 2310 Collective on MPI_Comm 2311 2312 Input Parameters: 2313 + A - the matrix 2314 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2315 (same value is used for all local rows) 2316 . d_nnz - array containing the number of nonzeros in the various rows of the 2317 DIAGONAL portion of the local submatrix (possibly different for each row) 2318 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2319 The size of this array is equal to the number of local rows, i.e 'm'. 2320 You must leave room for the diagonal entry even if it is zero. 2321 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2322 submatrix (same value is used for all local rows). 2323 - o_nnz - array containing the number of nonzeros in the various rows of the 2324 OFF-DIAGONAL portion of the local submatrix (possibly different for 2325 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2326 structure. The size of this array is equal to the number 2327 of local rows, i.e 'm'. 2328 2329 If the *_nnz parameter is given then the *_nz parameter is ignored 2330 2331 The AIJ format (also called the Yale sparse matrix format or 2332 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2333 storage. The stored row and column indices begin with zero. See the users manual for details. 2334 2335 The parallel matrix is partitioned such that the first m0 rows belong to 2336 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2337 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2338 2339 The DIAGONAL portion of the local submatrix of a processor can be defined 2340 as the submatrix which is obtained by extraction the part corresponding 2341 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2342 first row that belongs to the processor, and r2 is the last row belonging 2343 to the this processor. This is a square mxm matrix. The remaining portion 2344 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2345 2346 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2347 2348 Example usage: 2349 2350 Consider the following 8x8 matrix with 34 non-zero values, that is 2351 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2352 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2353 as follows: 2354 2355 .vb 2356 1 2 0 | 0 3 0 | 0 4 2357 Proc0 0 5 6 | 7 0 0 | 8 0 2358 9 0 10 | 11 0 0 | 12 0 2359 ------------------------------------- 2360 13 0 14 | 15 16 17 | 0 0 2361 Proc1 0 18 0 | 19 20 21 | 0 0 2362 0 0 0 | 22 23 0 | 24 0 2363 ------------------------------------- 2364 Proc2 25 26 27 | 0 0 28 | 29 0 2365 30 0 0 | 31 32 33 | 0 34 2366 .ve 2367 2368 This can be represented as a collection of submatrices as: 2369 2370 .vb 2371 A B C 2372 D E F 2373 G H I 2374 .ve 2375 2376 Where the submatrices A,B,C are owned by proc0, D,E,F are 2377 owned by proc1, G,H,I are owned by proc2. 2378 2379 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2380 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2381 The 'M','N' parameters are 8,8, and have the same values on all procs. 2382 2383 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2384 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2385 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2386 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2387 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2388 matrix, ans [DF] as another SeqAIJ matrix. 2389 2390 When d_nz, o_nz parameters are specified, d_nz storage elements are 2391 allocated for every row of the local diagonal submatrix, and o_nz 2392 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2393 One way to choose d_nz and o_nz is to use the max nonzerors per local 2394 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2395 In this case, the values of d_nz,o_nz are: 2396 .vb 2397 proc0 : dnz = 2, o_nz = 2 2398 proc1 : dnz = 3, o_nz = 2 2399 proc2 : dnz = 1, o_nz = 4 2400 .ve 2401 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2402 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2403 for proc3. i.e we are using 12+15+10=37 storage locations to store 2404 34 values. 2405 2406 When d_nnz, o_nnz parameters are specified, the storage is specified 2407 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2408 In the above case the values for d_nnz,o_nnz are: 2409 .vb 2410 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2411 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2412 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2413 .ve 2414 Here the space allocated is sum of all the above values i.e 34, and 2415 hence pre-allocation is perfect. 2416 2417 Level: intermediate 2418 2419 .keywords: matrix, aij, compressed row, sparse, parallel 2420 2421 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2422 MPIAIJ 2423 @*/ 2424 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2425 { 2426 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2427 2428 PetscFunctionBegin; 2429 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2430 if (f) { 2431 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2432 } 2433 PetscFunctionReturn(0); 2434 } 2435 2436 #undef __FUNCT__ 2437 #define __FUNCT__ "MatCreateMPIAIJ" 2438 /*@C 2439 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2440 (the default parallel PETSc format). For good matrix assembly performance 2441 the user should preallocate the matrix storage by setting the parameters 2442 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2443 performance can be increased by more than a factor of 50. 2444 2445 Collective on MPI_Comm 2446 2447 Input Parameters: 2448 + comm - MPI communicator 2449 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2450 This value should be the same as the local size used in creating the 2451 y vector for the matrix-vector product y = Ax. 2452 . n - This value should be the same as the local size used in creating the 2453 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2454 calculated if N is given) For square matrices n is almost always m. 2455 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2456 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2457 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2458 (same value is used for all local rows) 2459 . d_nnz - array containing the number of nonzeros in the various rows of the 2460 DIAGONAL portion of the local submatrix (possibly different for each row) 2461 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2462 The size of this array is equal to the number of local rows, i.e 'm'. 2463 You must leave room for the diagonal entry even if it is zero. 2464 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2465 submatrix (same value is used for all local rows). 2466 - o_nnz - array containing the number of nonzeros in the various rows of the 2467 OFF-DIAGONAL portion of the local submatrix (possibly different for 2468 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2469 structure. The size of this array is equal to the number 2470 of local rows, i.e 'm'. 2471 2472 Output Parameter: 2473 . A - the matrix 2474 2475 Notes: 2476 If the *_nnz parameter is given then the *_nz parameter is ignored 2477 2478 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2479 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2480 storage requirements for this matrix. 2481 2482 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2483 processor than it must be used on all processors that share the object for 2484 that argument. 2485 2486 The user MUST specify either the local or global matrix dimensions 2487 (possibly both). 2488 2489 The parallel matrix is partitioned such that the first m0 rows belong to 2490 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2491 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2492 2493 The DIAGONAL portion of the local submatrix of a processor can be defined 2494 as the submatrix which is obtained by extraction the part corresponding 2495 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2496 first row that belongs to the processor, and r2 is the last row belonging 2497 to the this processor. This is a square mxm matrix. The remaining portion 2498 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2499 2500 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2501 2502 When calling this routine with a single process communicator, a matrix of 2503 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2504 type of communicator, use the construction mechanism: 2505 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2506 2507 By default, this format uses inodes (identical nodes) when possible. 2508 We search for consecutive rows with the same nonzero structure, thereby 2509 reusing matrix information to achieve increased efficiency. 2510 2511 Options Database Keys: 2512 + -mat_no_inode - Do not use inodes 2513 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2514 - -mat_aij_oneindex - Internally use indexing starting at 1 2515 rather than 0. Note that when calling MatSetValues(), 2516 the user still MUST index entries starting at 0! 2517 2518 2519 Example usage: 2520 2521 Consider the following 8x8 matrix with 34 non-zero values, that is 2522 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2523 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2524 as follows: 2525 2526 .vb 2527 1 2 0 | 0 3 0 | 0 4 2528 Proc0 0 5 6 | 7 0 0 | 8 0 2529 9 0 10 | 11 0 0 | 12 0 2530 ------------------------------------- 2531 13 0 14 | 15 16 17 | 0 0 2532 Proc1 0 18 0 | 19 20 21 | 0 0 2533 0 0 0 | 22 23 0 | 24 0 2534 ------------------------------------- 2535 Proc2 25 26 27 | 0 0 28 | 29 0 2536 30 0 0 | 31 32 33 | 0 34 2537 .ve 2538 2539 This can be represented as a collection of submatrices as: 2540 2541 .vb 2542 A B C 2543 D E F 2544 G H I 2545 .ve 2546 2547 Where the submatrices A,B,C are owned by proc0, D,E,F are 2548 owned by proc1, G,H,I are owned by proc2. 2549 2550 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2551 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2552 The 'M','N' parameters are 8,8, and have the same values on all procs. 2553 2554 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2555 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2556 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2557 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2558 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2559 matrix, ans [DF] as another SeqAIJ matrix. 2560 2561 When d_nz, o_nz parameters are specified, d_nz storage elements are 2562 allocated for every row of the local diagonal submatrix, and o_nz 2563 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2564 One way to choose d_nz and o_nz is to use the max nonzerors per local 2565 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2566 In this case, the values of d_nz,o_nz are: 2567 .vb 2568 proc0 : dnz = 2, o_nz = 2 2569 proc1 : dnz = 3, o_nz = 2 2570 proc2 : dnz = 1, o_nz = 4 2571 .ve 2572 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2573 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2574 for proc3. i.e we are using 12+15+10=37 storage locations to store 2575 34 values. 2576 2577 When d_nnz, o_nnz parameters are specified, the storage is specified 2578 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2579 In the above case the values for d_nnz,o_nnz are: 2580 .vb 2581 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2582 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2583 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2584 .ve 2585 Here the space allocated is sum of all the above values i.e 34, and 2586 hence pre-allocation is perfect. 2587 2588 Level: intermediate 2589 2590 .keywords: matrix, aij, compressed row, sparse, parallel 2591 2592 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2593 MPIAIJ 2594 @*/ 2595 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2596 { 2597 PetscErrorCode ierr; 2598 PetscMPIInt size; 2599 2600 PetscFunctionBegin; 2601 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2602 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2603 if (size > 1) { 2604 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2605 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2606 } else { 2607 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2608 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2609 } 2610 PetscFunctionReturn(0); 2611 } 2612 2613 #undef __FUNCT__ 2614 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2615 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2616 { 2617 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2618 2619 PetscFunctionBegin; 2620 *Ad = a->A; 2621 *Ao = a->B; 2622 *colmap = a->garray; 2623 PetscFunctionReturn(0); 2624 } 2625 2626 #undef __FUNCT__ 2627 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2628 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2629 { 2630 PetscErrorCode ierr; 2631 PetscInt i; 2632 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2633 2634 PetscFunctionBegin; 2635 if (coloring->ctype == IS_COLORING_LOCAL) { 2636 ISColoringValue *allcolors,*colors; 2637 ISColoring ocoloring; 2638 2639 /* set coloring for diagonal portion */ 2640 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2641 2642 /* set coloring for off-diagonal portion */ 2643 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2644 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2645 for (i=0; i<a->B->n; i++) { 2646 colors[i] = allcolors[a->garray[i]]; 2647 } 2648 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2649 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2650 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2651 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2652 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2653 ISColoringValue *colors; 2654 PetscInt *larray; 2655 ISColoring ocoloring; 2656 2657 /* set coloring for diagonal portion */ 2658 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2659 for (i=0; i<a->A->n; i++) { 2660 larray[i] = i + a->cstart; 2661 } 2662 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2663 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2664 for (i=0; i<a->A->n; i++) { 2665 colors[i] = coloring->colors[larray[i]]; 2666 } 2667 ierr = PetscFree(larray);CHKERRQ(ierr); 2668 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2669 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2670 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2671 2672 /* set coloring for off-diagonal portion */ 2673 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2674 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2675 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2676 for (i=0; i<a->B->n; i++) { 2677 colors[i] = coloring->colors[larray[i]]; 2678 } 2679 ierr = PetscFree(larray);CHKERRQ(ierr); 2680 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2681 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2682 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2683 } else { 2684 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2685 } 2686 2687 PetscFunctionReturn(0); 2688 } 2689 2690 #if defined(PETSC_HAVE_ADIC) 2691 #undef __FUNCT__ 2692 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2693 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2694 { 2695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2696 PetscErrorCode ierr; 2697 2698 PetscFunctionBegin; 2699 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2700 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2701 PetscFunctionReturn(0); 2702 } 2703 #endif 2704 2705 #undef __FUNCT__ 2706 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2707 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2708 { 2709 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2710 PetscErrorCode ierr; 2711 2712 PetscFunctionBegin; 2713 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2714 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2715 PetscFunctionReturn(0); 2716 } 2717 2718 #undef __FUNCT__ 2719 #define __FUNCT__ "MatMerge" 2720 /*@C 2721 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2722 matrices from each processor 2723 2724 Collective on MPI_Comm 2725 2726 Input Parameters: 2727 + comm - the communicators the parallel matrix will live on 2728 . inmat - the input sequential matrices 2729 . n - number of local columns (or PETSC_DECIDE) 2730 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2731 2732 Output Parameter: 2733 . outmat - the parallel matrix generated 2734 2735 Level: advanced 2736 2737 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2738 2739 @*/ 2740 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2741 { 2742 PetscErrorCode ierr; 2743 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2744 PetscInt *indx; 2745 PetscScalar *values; 2746 PetscMap columnmap,rowmap; 2747 2748 PetscFunctionBegin; 2749 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2750 /* 2751 PetscMPIInt rank; 2752 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2753 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2754 */ 2755 if (scall == MAT_INITIAL_MATRIX){ 2756 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2757 if (n == PETSC_DECIDE){ 2758 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2759 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2760 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2761 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2762 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2763 } 2764 2765 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2766 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2767 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2768 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2769 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2770 2771 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2772 for (i=0;i<m;i++) { 2773 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2774 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2775 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2776 } 2777 /* This routine will ONLY return MPIAIJ type matrix */ 2778 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2779 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2780 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2781 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2782 2783 } else if (scall == MAT_REUSE_MATRIX){ 2784 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2785 } else { 2786 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2787 } 2788 2789 for (i=0;i<m;i++) { 2790 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2791 I = i + rstart; 2792 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2793 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2794 } 2795 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2796 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2797 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2798 2799 PetscFunctionReturn(0); 2800 } 2801 2802 #undef __FUNCT__ 2803 #define __FUNCT__ "MatFileSplit" 2804 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2805 { 2806 PetscErrorCode ierr; 2807 PetscMPIInt rank; 2808 PetscInt m,N,i,rstart,nnz; 2809 size_t len; 2810 const PetscInt *indx; 2811 PetscViewer out; 2812 char *name; 2813 Mat B; 2814 const PetscScalar *values; 2815 2816 PetscFunctionBegin; 2817 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2818 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2819 /* Should this be the type of the diagonal block of A? */ 2820 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2821 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2822 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2823 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2824 for (i=0;i<m;i++) { 2825 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2826 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2827 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2828 } 2829 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2830 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2831 2832 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2833 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2834 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2835 sprintf(name,"%s.%d",outfile,rank); 2836 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2837 ierr = PetscFree(name); 2838 ierr = MatView(B,out);CHKERRQ(ierr); 2839 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2840 ierr = MatDestroy(B);CHKERRQ(ierr); 2841 PetscFunctionReturn(0); 2842 } 2843 2844 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2845 #undef __FUNCT__ 2846 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2847 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2848 { 2849 PetscErrorCode ierr; 2850 Mat_Merge_SeqsToMPI *merge; 2851 PetscObjectContainer container; 2852 2853 PetscFunctionBegin; 2854 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2855 if (container) { 2856 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2857 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2858 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2859 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2860 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2861 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2862 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2863 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2864 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2865 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2866 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2867 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2868 2869 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2870 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2871 } 2872 ierr = PetscFree(merge);CHKERRQ(ierr); 2873 2874 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2875 PetscFunctionReturn(0); 2876 } 2877 2878 #include "src/mat/utils/freespace.h" 2879 #include "petscbt.h" 2880 static PetscEvent logkey_seqstompinum = 0; 2881 #undef __FUNCT__ 2882 #define __FUNCT__ "MatMerge_SeqsToMPINumeric" 2883 /*@C 2884 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2885 matrices from each processor 2886 2887 Collective on MPI_Comm 2888 2889 Input Parameters: 2890 + comm - the communicators the parallel matrix will live on 2891 . seqmat - the input sequential matrices 2892 . m - number of local rows (or PETSC_DECIDE) 2893 . n - number of local columns (or PETSC_DECIDE) 2894 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2895 2896 Output Parameter: 2897 . mpimat - the parallel matrix generated 2898 2899 Level: advanced 2900 2901 Notes: 2902 The dimensions of the sequential matrix in each processor MUST be the same. 2903 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2904 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2905 @*/ 2906 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2907 { 2908 PetscErrorCode ierr; 2909 MPI_Comm comm=mpimat->comm; 2910 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2911 PetscMPIInt size,rank,taga,*len_s; 2912 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2913 PetscInt proc,m; 2914 PetscInt **buf_ri,**buf_rj; 2915 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2916 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2917 MPI_Request *s_waits,*r_waits; 2918 MPI_Status *status; 2919 MatScalar *aa=a->a,**abuf_r,*ba_i; 2920 Mat_Merge_SeqsToMPI *merge; 2921 PetscObjectContainer container; 2922 2923 PetscFunctionBegin; 2924 if (!logkey_seqstompinum) { 2925 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2926 } 2927 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2928 2929 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2930 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2931 2932 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2933 if (container) { 2934 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2935 } 2936 bi = merge->bi; 2937 bj = merge->bj; 2938 buf_ri = merge->buf_ri; 2939 buf_rj = merge->buf_rj; 2940 2941 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2942 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2943 len_s = merge->len_s; 2944 2945 /* send and recv matrix values */ 2946 /*-----------------------------*/ 2947 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2948 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2949 2950 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2951 for (proc=0,k=0; proc<size; proc++){ 2952 if (!len_s[proc]) continue; 2953 i = owners[proc]; 2954 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2955 k++; 2956 } 2957 2958 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2959 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2960 ierr = PetscFree(status);CHKERRQ(ierr); 2961 2962 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2963 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2964 2965 /* insert mat values of mpimat */ 2966 /*----------------------------*/ 2967 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2968 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2969 nextrow = buf_ri_k + merge->nrecv; 2970 nextai = nextrow + merge->nrecv; 2971 2972 for (k=0; k<merge->nrecv; k++){ 2973 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2974 nrows = *(buf_ri_k[k]); 2975 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2976 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2977 } 2978 2979 /* set values of ba */ 2980 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2981 for (i=0; i<m; i++) { 2982 arow = owners[rank] + i; 2983 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2984 bnzi = bi[i+1] - bi[i]; 2985 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2986 2987 /* add local non-zero vals of this proc's seqmat into ba */ 2988 anzi = ai[arow+1] - ai[arow]; 2989 aj = a->j + ai[arow]; 2990 aa = a->a + ai[arow]; 2991 nextaj = 0; 2992 for (j=0; nextaj<anzi; j++){ 2993 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2994 ba_i[j] += aa[nextaj++]; 2995 } 2996 } 2997 2998 /* add received vals into ba */ 2999 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3000 /* i-th row */ 3001 if (i == *nextrow[k]) { 3002 anzi = *(nextai[k]+1) - *nextai[k]; 3003 aj = buf_rj[k] + *(nextai[k]); 3004 aa = abuf_r[k] + *(nextai[k]); 3005 nextaj = 0; 3006 for (j=0; nextaj<anzi; j++){ 3007 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3008 ba_i[j] += aa[nextaj++]; 3009 } 3010 } 3011 nextrow[k]++; nextai[k]++; 3012 } 3013 } 3014 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3015 } 3016 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3017 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3018 3019 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3020 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3021 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3022 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3023 PetscFunctionReturn(0); 3024 } 3025 3026 static PetscEvent logkey_seqstompisym = 0; 3027 #undef __FUNCT__ 3028 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic" 3029 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3030 { 3031 PetscErrorCode ierr; 3032 Mat B_mpi; 3033 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3034 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3035 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3036 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3037 PetscInt len,proc,*dnz,*onz; 3038 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3039 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3040 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3041 MPI_Status *status; 3042 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3043 PetscBT lnkbt; 3044 Mat_Merge_SeqsToMPI *merge; 3045 PetscObjectContainer container; 3046 3047 PetscFunctionBegin; 3048 if (!logkey_seqstompisym) { 3049 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3050 } 3051 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3052 3053 /* make sure it is a PETSc comm */ 3054 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 3055 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3056 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3057 3058 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3059 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3060 3061 /* determine row ownership */ 3062 /*---------------------------------------------------------*/ 3063 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3064 if (m == PETSC_DECIDE) { 3065 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3066 } else { 3067 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3068 } 3069 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3070 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3071 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3072 3073 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3074 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3075 3076 /* determine the number of messages to send, their lengths */ 3077 /*---------------------------------------------------------*/ 3078 len_s = merge->len_s; 3079 3080 len = 0; /* length of buf_si[] */ 3081 merge->nsend = 0; 3082 for (proc=0; proc<size; proc++){ 3083 len_si[proc] = 0; 3084 if (proc == rank){ 3085 len_s[proc] = 0; 3086 } else { 3087 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3088 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3089 } 3090 if (len_s[proc]) { 3091 merge->nsend++; 3092 nrows = 0; 3093 for (i=owners[proc]; i<owners[proc+1]; i++){ 3094 if (ai[i+1] > ai[i]) nrows++; 3095 } 3096 len_si[proc] = 2*(nrows+1); 3097 len += len_si[proc]; 3098 } 3099 } 3100 3101 /* determine the number and length of messages to receive for ij-structure */ 3102 /*-------------------------------------------------------------------------*/ 3103 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3104 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3105 3106 /* post the Irecv of j-structure */ 3107 /*-------------------------------*/ 3108 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3109 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3110 3111 /* post the Isend of j-structure */ 3112 /*--------------------------------*/ 3113 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3114 sj_waits = si_waits + merge->nsend; 3115 3116 for (proc=0, k=0; proc<size; proc++){ 3117 if (!len_s[proc]) continue; 3118 i = owners[proc]; 3119 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3120 k++; 3121 } 3122 3123 /* receives and sends of j-structure are complete */ 3124 /*------------------------------------------------*/ 3125 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3126 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3127 3128 /* send and recv i-structure */ 3129 /*---------------------------*/ 3130 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3131 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3132 3133 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3134 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3135 for (proc=0,k=0; proc<size; proc++){ 3136 if (!len_s[proc]) continue; 3137 /* form outgoing message for i-structure: 3138 buf_si[0]: nrows to be sent 3139 [1:nrows]: row index (global) 3140 [nrows+1:2*nrows+1]: i-structure index 3141 */ 3142 /*-------------------------------------------*/ 3143 nrows = len_si[proc]/2 - 1; 3144 buf_si_i = buf_si + nrows+1; 3145 buf_si[0] = nrows; 3146 buf_si_i[0] = 0; 3147 nrows = 0; 3148 for (i=owners[proc]; i<owners[proc+1]; i++){ 3149 anzi = ai[i+1] - ai[i]; 3150 if (anzi) { 3151 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3152 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3153 nrows++; 3154 } 3155 } 3156 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3157 k++; 3158 buf_si += len_si[proc]; 3159 } 3160 3161 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3162 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3163 3164 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr); 3165 for (i=0; i<merge->nrecv; i++){ 3166 ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr); 3167 } 3168 3169 ierr = PetscFree(len_si);CHKERRQ(ierr); 3170 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3171 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3172 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3173 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3174 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3175 ierr = PetscFree(status);CHKERRQ(ierr); 3176 3177 /* compute a local seq matrix in each processor */ 3178 /*----------------------------------------------*/ 3179 /* allocate bi array and free space for accumulating nonzero column info */ 3180 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3181 bi[0] = 0; 3182 3183 /* create and initialize a linked list */ 3184 nlnk = N+1; 3185 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3186 3187 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3188 len = 0; 3189 len = ai[owners[rank+1]] - ai[owners[rank]]; 3190 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3191 current_space = free_space; 3192 3193 /* determine symbolic info for each local row */ 3194 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3195 nextrow = buf_ri_k + merge->nrecv; 3196 nextai = nextrow + merge->nrecv; 3197 for (k=0; k<merge->nrecv; k++){ 3198 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3199 nrows = *buf_ri_k[k]; 3200 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3201 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3202 } 3203 3204 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3205 len = 0; 3206 for (i=0;i<m;i++) { 3207 bnzi = 0; 3208 /* add local non-zero cols of this proc's seqmat into lnk */ 3209 arow = owners[rank] + i; 3210 anzi = ai[arow+1] - ai[arow]; 3211 aj = a->j + ai[arow]; 3212 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3213 bnzi += nlnk; 3214 /* add received col data into lnk */ 3215 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3216 if (i == *nextrow[k]) { /* i-th row */ 3217 anzi = *(nextai[k]+1) - *nextai[k]; 3218 aj = buf_rj[k] + *nextai[k]; 3219 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3220 bnzi += nlnk; 3221 nextrow[k]++; nextai[k]++; 3222 } 3223 } 3224 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3225 3226 /* if free space is not available, make more free space */ 3227 if (current_space->local_remaining<bnzi) { 3228 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3229 nspacedouble++; 3230 } 3231 /* copy data into free space, then initialize lnk */ 3232 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3233 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3234 3235 current_space->array += bnzi; 3236 current_space->local_used += bnzi; 3237 current_space->local_remaining -= bnzi; 3238 3239 bi[i+1] = bi[i] + bnzi; 3240 } 3241 3242 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3243 3244 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3245 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3246 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3247 3248 /* create symbolic parallel matrix B_mpi */ 3249 /*---------------------------------------*/ 3250 if (n==PETSC_DECIDE) { 3251 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3252 } else { 3253 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3254 } 3255 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3256 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3257 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3258 3259 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3260 B_mpi->assembled = PETSC_FALSE; 3261 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3262 merge->bi = bi; 3263 merge->bj = bj; 3264 merge->buf_ri = buf_ri; 3265 merge->buf_rj = buf_rj; 3266 merge->coi = PETSC_NULL; 3267 merge->coj = PETSC_NULL; 3268 merge->owners_co = PETSC_NULL; 3269 3270 /* attach the supporting struct to B_mpi for reuse */ 3271 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3272 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3273 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3274 *mpimat = B_mpi; 3275 3276 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 3277 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3278 PetscFunctionReturn(0); 3279 } 3280 3281 static PetscEvent logkey_seqstompi = 0; 3282 #undef __FUNCT__ 3283 #define __FUNCT__ "MatMerge_SeqsToMPI" 3284 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3285 { 3286 PetscErrorCode ierr; 3287 3288 PetscFunctionBegin; 3289 if (!logkey_seqstompi) { 3290 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3291 } 3292 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3293 if (scall == MAT_INITIAL_MATRIX){ 3294 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3295 } 3296 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3297 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3298 PetscFunctionReturn(0); 3299 } 3300 static PetscEvent logkey_getlocalmat = 0; 3301 #undef __FUNCT__ 3302 #define __FUNCT__ "MatGetLocalMat" 3303 /*@C 3304 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3305 3306 Not Collective 3307 3308 Input Parameters: 3309 + A - the matrix 3310 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3311 3312 Output Parameter: 3313 . A_loc - the local sequential matrix generated 3314 3315 Level: developer 3316 3317 @*/ 3318 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3319 { 3320 PetscErrorCode ierr; 3321 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3322 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3323 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3324 PetscScalar *aa=a->a,*ba=b->a,*ca; 3325 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3326 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3327 3328 PetscFunctionBegin; 3329 if (!logkey_getlocalmat) { 3330 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3331 } 3332 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3333 if (scall == MAT_INITIAL_MATRIX){ 3334 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3335 ci[0] = 0; 3336 for (i=0; i<am; i++){ 3337 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3338 } 3339 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3340 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3341 k = 0; 3342 for (i=0; i<am; i++) { 3343 ncols_o = bi[i+1] - bi[i]; 3344 ncols_d = ai[i+1] - ai[i]; 3345 /* off-diagonal portion of A */ 3346 for (jo=0; jo<ncols_o; jo++) { 3347 col = cmap[*bj]; 3348 if (col >= cstart) break; 3349 cj[k] = col; bj++; 3350 ca[k++] = *ba++; 3351 } 3352 /* diagonal portion of A */ 3353 for (j=0; j<ncols_d; j++) { 3354 cj[k] = cstart + *aj++; 3355 ca[k++] = *aa++; 3356 } 3357 /* off-diagonal portion of A */ 3358 for (j=jo; j<ncols_o; j++) { 3359 cj[k] = cmap[*bj++]; 3360 ca[k++] = *ba++; 3361 } 3362 } 3363 /* put together the new matrix */ 3364 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3365 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3366 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3367 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3368 mat->freedata = PETSC_TRUE; 3369 mat->nonew = 0; 3370 } else if (scall == MAT_REUSE_MATRIX){ 3371 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3372 ci = mat->i; cj = mat->j; ca = mat->a; 3373 for (i=0; i<am; i++) { 3374 /* off-diagonal portion of A */ 3375 ncols_o = bi[i+1] - bi[i]; 3376 for (jo=0; jo<ncols_o; jo++) { 3377 col = cmap[*bj]; 3378 if (col >= cstart) break; 3379 *ca++ = *ba++; bj++; 3380 } 3381 /* diagonal portion of A */ 3382 ncols_d = ai[i+1] - ai[i]; 3383 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3384 /* off-diagonal portion of A */ 3385 for (j=jo; j<ncols_o; j++) { 3386 *ca++ = *ba++; bj++; 3387 } 3388 } 3389 } else { 3390 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3391 } 3392 3393 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3394 PetscFunctionReturn(0); 3395 } 3396 3397 static PetscEvent logkey_getlocalmatcondensed = 0; 3398 #undef __FUNCT__ 3399 #define __FUNCT__ "MatGetLocalMatCondensed" 3400 /*@C 3401 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3402 3403 Not Collective 3404 3405 Input Parameters: 3406 + A - the matrix 3407 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3408 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3409 3410 Output Parameter: 3411 . A_loc - the local sequential matrix generated 3412 3413 Level: developer 3414 3415 @*/ 3416 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3417 { 3418 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3419 PetscErrorCode ierr; 3420 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3421 IS isrowa,iscola; 3422 Mat *aloc; 3423 3424 PetscFunctionBegin; 3425 if (!logkey_getlocalmatcondensed) { 3426 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3427 } 3428 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3429 if (!row){ 3430 start = a->rstart; end = a->rend; 3431 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3432 } else { 3433 isrowa = *row; 3434 } 3435 if (!col){ 3436 start = a->cstart; 3437 cmap = a->garray; 3438 nzA = a->A->n; 3439 nzB = a->B->n; 3440 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3441 ncols = 0; 3442 for (i=0; i<nzB; i++) { 3443 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3444 else break; 3445 } 3446 imark = i; 3447 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3448 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3449 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3450 ierr = PetscFree(idx);CHKERRQ(ierr); 3451 } else { 3452 iscola = *col; 3453 } 3454 if (scall != MAT_INITIAL_MATRIX){ 3455 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3456 aloc[0] = *A_loc; 3457 } 3458 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3459 *A_loc = aloc[0]; 3460 ierr = PetscFree(aloc);CHKERRQ(ierr); 3461 if (!row){ 3462 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3463 } 3464 if (!col){ 3465 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3466 } 3467 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3468 PetscFunctionReturn(0); 3469 } 3470 3471 static PetscEvent logkey_GetBrowsOfAcols = 0; 3472 #undef __FUNCT__ 3473 #define __FUNCT__ "MatGetBrowsOfAcols" 3474 /*@C 3475 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3476 3477 Collective on Mat 3478 3479 Input Parameters: 3480 + A,B - the matrices in mpiaij format 3481 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3482 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3483 3484 Output Parameter: 3485 + rowb, colb - index sets of rows and columns of B to extract 3486 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3487 - B_seq - the sequential matrix generated 3488 3489 Level: developer 3490 3491 @*/ 3492 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3493 { 3494 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3495 PetscErrorCode ierr; 3496 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3497 IS isrowb,iscolb; 3498 Mat *bseq; 3499 3500 PetscFunctionBegin; 3501 if (a->cstart != b->rstart || a->cend != b->rend){ 3502 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3503 } 3504 if (!logkey_GetBrowsOfAcols) { 3505 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3506 } 3507 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3508 3509 if (scall == MAT_INITIAL_MATRIX){ 3510 start = a->cstart; 3511 cmap = a->garray; 3512 nzA = a->A->n; 3513 nzB = a->B->n; 3514 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3515 ncols = 0; 3516 for (i=0; i<nzB; i++) { /* row < local row index */ 3517 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3518 else break; 3519 } 3520 imark = i; 3521 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3522 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3523 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3524 ierr = PetscFree(idx);CHKERRQ(ierr); 3525 *brstart = imark; 3526 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3527 } else { 3528 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3529 isrowb = *rowb; iscolb = *colb; 3530 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3531 bseq[0] = *B_seq; 3532 } 3533 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3534 *B_seq = bseq[0]; 3535 ierr = PetscFree(bseq);CHKERRQ(ierr); 3536 if (!rowb){ 3537 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3538 } else { 3539 *rowb = isrowb; 3540 } 3541 if (!colb){ 3542 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3543 } else { 3544 *colb = iscolb; 3545 } 3546 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3547 PetscFunctionReturn(0); 3548 } 3549 3550 static PetscEvent logkey_GetBrowsOfAocols = 0; 3551 #undef __FUNCT__ 3552 #define __FUNCT__ "MatGetBrowsOfAoCols" 3553 /*@C 3554 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3555 of the OFF-DIAGONAL portion of local A 3556 3557 Collective on Mat 3558 3559 Input Parameters: 3560 + A,B - the matrices in mpiaij format 3561 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3562 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3563 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3564 3565 Output Parameter: 3566 + B_oth - the sequential matrix generated 3567 3568 Level: developer 3569 3570 @*/ 3571 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3572 { 3573 VecScatter_MPI_General *gen_to,*gen_from; 3574 PetscErrorCode ierr; 3575 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3576 Mat_SeqAIJ *b_oth; 3577 VecScatter ctx=a->Mvctx; 3578 MPI_Comm comm=ctx->comm; 3579 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3580 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3581 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3582 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3583 MPI_Request *rwaits,*swaits; 3584 MPI_Status *sstatus,rstatus; 3585 PetscInt *cols; 3586 PetscScalar *vals; 3587 PetscMPIInt j; 3588 3589 PetscFunctionBegin; 3590 if (a->cstart != b->rstart || a->cend != b->rend){ 3591 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3592 } 3593 if (!logkey_GetBrowsOfAocols) { 3594 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3595 } 3596 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3597 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3598 3599 gen_to = (VecScatter_MPI_General*)ctx->todata; 3600 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3601 rvalues = gen_from->values; /* holds the length of sending row */ 3602 svalues = gen_to->values; /* holds the length of receiving row */ 3603 nrecvs = gen_from->n; 3604 nsends = gen_to->n; 3605 rwaits = gen_from->requests; 3606 swaits = gen_to->requests; 3607 srow = gen_to->indices; /* local row index to be sent */ 3608 rstarts = gen_from->starts; 3609 sstarts = gen_to->starts; 3610 rprocs = gen_from->procs; 3611 sprocs = gen_to->procs; 3612 sstatus = gen_to->sstatus; 3613 3614 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3615 if (scall == MAT_INITIAL_MATRIX){ 3616 /* i-array */ 3617 /*---------*/ 3618 /* post receives */ 3619 for (i=0; i<nrecvs; i++){ 3620 rowlen = (PetscInt*)rvalues + rstarts[i]; 3621 nrows = rstarts[i+1]-rstarts[i]; 3622 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3623 } 3624 3625 /* pack the outgoing message */ 3626 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3627 rstartsj = sstartsj + nsends +1; 3628 sstartsj[0] = 0; rstartsj[0] = 0; 3629 len = 0; /* total length of j or a array to be sent */ 3630 k = 0; 3631 for (i=0; i<nsends; i++){ 3632 rowlen = (PetscInt*)svalues + sstarts[i]; 3633 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3634 for (j=0; j<nrows; j++) { 3635 row = srow[k] + b->rowners[rank]; /* global row idx */ 3636 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3637 len += rowlen[j]; 3638 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3639 k++; 3640 } 3641 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3642 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3643 } 3644 /* recvs and sends of i-array are completed */ 3645 i = nrecvs; 3646 while (i--) { 3647 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3648 } 3649 if (nsends) { 3650 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3651 } 3652 /* allocate buffers for sending j and a arrays */ 3653 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3654 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3655 3656 /* create i-array of B_oth */ 3657 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3658 b_othi[0] = 0; 3659 len = 0; /* total length of j or a array to be received */ 3660 k = 0; 3661 for (i=0; i<nrecvs; i++){ 3662 rowlen = (PetscInt*)rvalues + rstarts[i]; 3663 nrows = rstarts[i+1]-rstarts[i]; 3664 for (j=0; j<nrows; j++) { 3665 b_othi[k+1] = b_othi[k] + rowlen[j]; 3666 len += rowlen[j]; k++; 3667 } 3668 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3669 } 3670 3671 /* allocate space for j and a arrrays of B_oth */ 3672 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3673 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3674 3675 /* j-array */ 3676 /*---------*/ 3677 /* post receives of j-array */ 3678 for (i=0; i<nrecvs; i++){ 3679 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3680 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3681 } 3682 k = 0; 3683 for (i=0; i<nsends; i++){ 3684 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3685 bufJ = bufj+sstartsj[i]; 3686 for (j=0; j<nrows; j++) { 3687 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3688 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3689 for (l=0; l<ncols; l++){ 3690 *bufJ++ = cols[l]; 3691 } 3692 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3693 } 3694 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3695 } 3696 3697 /* recvs and sends of j-array are completed */ 3698 i = nrecvs; 3699 while (i--) { 3700 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3701 } 3702 if (nsends) { 3703 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3704 } 3705 } else if (scall == MAT_REUSE_MATRIX){ 3706 sstartsj = *startsj; 3707 rstartsj = sstartsj + nsends +1; 3708 bufa = *bufa_ptr; 3709 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3710 b_otha = b_oth->a; 3711 } else { 3712 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3713 } 3714 3715 /* a-array */ 3716 /*---------*/ 3717 /* post receives of a-array */ 3718 for (i=0; i<nrecvs; i++){ 3719 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3720 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3721 } 3722 k = 0; 3723 for (i=0; i<nsends; i++){ 3724 nrows = sstarts[i+1]-sstarts[i]; 3725 bufA = bufa+sstartsj[i]; 3726 for (j=0; j<nrows; j++) { 3727 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3728 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3729 for (l=0; l<ncols; l++){ 3730 *bufA++ = vals[l]; 3731 } 3732 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3733 3734 } 3735 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3736 } 3737 /* recvs and sends of a-array are completed */ 3738 i = nrecvs; 3739 while (i--) { 3740 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3741 } 3742 if (nsends) { 3743 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3744 } 3745 3746 if (scall == MAT_INITIAL_MATRIX){ 3747 /* put together the new matrix */ 3748 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3749 3750 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3751 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3752 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3753 b_oth->freedata = PETSC_TRUE; 3754 b_oth->nonew = 0; 3755 3756 ierr = PetscFree(bufj);CHKERRQ(ierr); 3757 if (!startsj || !bufa_ptr){ 3758 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3759 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3760 } else { 3761 *startsj = sstartsj; 3762 *bufa_ptr = bufa; 3763 } 3764 } 3765 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3766 3767 PetscFunctionReturn(0); 3768 } 3769 3770 /*MC 3771 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3772 3773 Options Database Keys: 3774 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3775 3776 Level: beginner 3777 3778 .seealso: MatCreateMPIAIJ 3779 M*/ 3780 3781 EXTERN_C_BEGIN 3782 #undef __FUNCT__ 3783 #define __FUNCT__ "MatCreate_MPIAIJ" 3784 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B) 3785 { 3786 Mat_MPIAIJ *b; 3787 PetscErrorCode ierr; 3788 PetscInt i; 3789 PetscMPIInt size; 3790 3791 PetscFunctionBegin; 3792 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3793 3794 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3795 B->data = (void*)b; 3796 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3797 B->factor = 0; 3798 B->bs = 1; 3799 B->assembled = PETSC_FALSE; 3800 B->mapping = 0; 3801 3802 B->insertmode = NOT_SET_VALUES; 3803 b->size = size; 3804 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3805 3806 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3807 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3808 3809 /* the information in the maps duplicates the information computed below, eventually 3810 we should remove the duplicate information that is not contained in the maps */ 3811 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3812 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3813 3814 /* build local table of row and column ownerships */ 3815 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3816 ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3817 b->cowners = b->rowners + b->size + 2; 3818 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3819 b->rowners[0] = 0; 3820 for (i=2; i<=b->size; i++) { 3821 b->rowners[i] += b->rowners[i-1]; 3822 } 3823 b->rstart = b->rowners[b->rank]; 3824 b->rend = b->rowners[b->rank+1]; 3825 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3826 b->cowners[0] = 0; 3827 for (i=2; i<=b->size; i++) { 3828 b->cowners[i] += b->cowners[i-1]; 3829 } 3830 b->cstart = b->cowners[b->rank]; 3831 b->cend = b->cowners[b->rank+1]; 3832 3833 /* build cache for off array entries formed */ 3834 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3835 b->donotstash = PETSC_FALSE; 3836 b->colmap = 0; 3837 b->garray = 0; 3838 b->roworiented = PETSC_TRUE; 3839 3840 /* stuff used for matrix vector multiply */ 3841 b->lvec = PETSC_NULL; 3842 b->Mvctx = PETSC_NULL; 3843 3844 /* stuff for MatGetRow() */ 3845 b->rowindices = 0; 3846 b->rowvalues = 0; 3847 b->getrowactive = PETSC_FALSE; 3848 3849 /* Explicitly create 2 MATSEQAIJ matrices. */ 3850 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3851 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3852 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3853 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3854 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3855 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3856 3857 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3858 "MatStoreValues_MPIAIJ", 3859 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3860 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3861 "MatRetrieveValues_MPIAIJ", 3862 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3863 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3864 "MatGetDiagonalBlock_MPIAIJ", 3865 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3866 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3867 "MatIsTranspose_MPIAIJ", 3868 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3869 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3870 "MatMPIAIJSetPreallocation_MPIAIJ", 3871 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3872 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3873 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3874 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3875 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3876 "MatDiagonalScaleLocal_MPIAIJ", 3877 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3878 PetscFunctionReturn(0); 3879 } 3880 EXTERN_C_END 3881