1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 if (lastcol1 > col) low1 = 0; else high1 = nrow1; \ 39 lastcol1 = col;\ 40 while (high1-low1 > 5) { \ 41 t = (low1+high1)/2; \ 42 if (rp1[t] > col) high1 = t; \ 43 else low1 = t; \ 44 } \ 45 for (_i=low1; _i<high1; _i++) { \ 46 if (rp1[_i] > col) break; \ 47 if (rp1[_i] == col) { \ 48 if (addv == ADD_VALUES) ap1[_i] += value; \ 49 else ap1[_i] = value; \ 50 goto a_noinsert; \ 51 } \ 52 } \ 53 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 54 if (nonew == 1) goto a_noinsert; \ 55 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 56 if (nrow1 >= rmax1) { \ 57 /* there is no extra room in row, therefore enlarge */ \ 58 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 59 PetscScalar *new_a; \ 60 \ 61 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 62 \ 63 /* malloc new storage space */ \ 64 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 65 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 66 new_j = (PetscInt*)(new_a + new_nz); \ 67 new_i = new_j + new_nz; \ 68 \ 69 /* copy over old data into new slots */ \ 70 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 71 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 72 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow1)*sizeof(PetscInt));CHKERRQ(ierr); \ 73 len = (new_nz - CHUNKSIZE - ai[row] - nrow1); \ 74 ierr = PetscMemcpy(new_j+ai[row]+nrow1+CHUNKSIZE,aj+ai[row]+nrow1,len*sizeof(PetscInt));CHKERRQ(ierr); \ 75 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow1)*sizeof(PetscScalar));CHKERRQ(ierr); \ 76 ierr = PetscMemcpy(new_a+ai[row]+nrow1+CHUNKSIZE,aa+ai[row]+nrow1,len*sizeof(PetscScalar));CHKERRQ(ierr); \ 77 /* free up old matrix storage */ \ 78 \ 79 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 80 if (!a->singlemalloc) { \ 81 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 82 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 83 } \ 84 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 85 a->singlemalloc = PETSC_TRUE; \ 86 \ 87 rp1 = aj + ai[row]; ap1 = aa + ai[row]; \ 88 rmax1 = aimax[row] = aimax[row] + CHUNKSIZE; \ 89 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \ 90 a->maxnz += CHUNKSIZE; \ 91 a->reallocs++; \ 92 } \ 93 N = nrow1++ - 1; a->nz++; \ 94 /* shift up all the later entries in this row */ \ 95 for (ii=N; ii>=_i; ii--) { \ 96 rp1[ii+1] = rp1[ii]; \ 97 ap1[ii+1] = ap1[ii]; \ 98 } \ 99 rp1[_i] = col; \ 100 ap1[_i] = value; \ 101 a_noinsert: ; \ 102 ailen[row] = nrow1; \ 103 } 104 105 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 106 { \ 107 if (lastcol2 > col) low2 = 0; else high2 = nrow2; \ 108 lastcol2 = col;\ 109 while (high2-low2 > 5) { \ 110 t = (low2+high2)/2; \ 111 if (rp2[t] > col) high2 = t; \ 112 else low2 = t; \ 113 } \ 114 for (_i=low2; _i<high2; _i++) { \ 115 if (rp2[_i] > col) break; \ 116 if (rp2[_i] == col) { \ 117 if (addv == ADD_VALUES) ap2[_i] += value; \ 118 else ap2[_i] = value; \ 119 goto b_noinsert; \ 120 } \ 121 } \ 122 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 123 if (nonew == 1) goto b_noinsert; \ 124 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 125 if (nrow2 >= rmax2) { \ 126 /* there is no extra room in row, therefore enlarge */ \ 127 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 128 PetscScalar *new_a; \ 129 \ 130 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 131 \ 132 /* malloc new storage space */ \ 133 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 134 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 135 new_j = (PetscInt*)(new_a + new_nz); \ 136 new_i = new_j + new_nz; \ 137 \ 138 /* copy over old data into new slots */ \ 139 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 140 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 141 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow2)*sizeof(PetscInt));CHKERRQ(ierr); \ 142 len = (new_nz - CHUNKSIZE - bi[row] - nrow2); \ 143 ierr = PetscMemcpy(new_j+bi[row]+nrow2+CHUNKSIZE,bj+bi[row]+nrow2,len*sizeof(PetscInt));CHKERRQ(ierr); \ 144 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow2)*sizeof(PetscScalar));CHKERRQ(ierr); \ 145 ierr = PetscMemcpy(new_a+bi[row]+nrow2+CHUNKSIZE,ba+bi[row]+nrow2,len*sizeof(PetscScalar));CHKERRQ(ierr); \ 146 /* free up old matrix storage */ \ 147 \ 148 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 149 if (!b->singlemalloc) { \ 150 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 151 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 152 } \ 153 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 154 b->singlemalloc = PETSC_TRUE; \ 155 \ 156 rp2 = bj + bi[row]; ap2 = ba + bi[row]; \ 157 rmax2 = bimax[row] = bimax[row] + CHUNKSIZE; \ 158 ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \ 159 b->maxnz += CHUNKSIZE; \ 160 b->reallocs++; \ 161 } \ 162 N = nrow2++ - 1; b->nz++; \ 163 /* shift up all the later entries in this row */ \ 164 for (ii=N; ii>=_i; ii--) { \ 165 rp2[ii+1] = rp2[ii]; \ 166 ap2[ii+1] = ap2[ii]; \ 167 } \ 168 rp2[_i] = col; \ 169 ap2[_i] = value; \ 170 b_noinsert: ; \ 171 bilen[row] = nrow2; \ 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "MatSetValues_MPIAIJ" 176 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 177 { 178 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 179 PetscScalar value; 180 PetscErrorCode ierr; 181 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 182 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 183 PetscTruth roworiented = aij->roworiented; 184 185 /* Some Variables required in the macro */ 186 Mat A = aij->A; 187 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 188 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 189 PetscScalar *aa = a->a; 190 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 191 Mat B = aij->B; 192 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 193 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 194 PetscScalar *ba = b->a; 195 196 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 197 PetscInt nonew = a->nonew; 198 PetscScalar *ap1,*ap2; 199 200 PetscFunctionBegin; 201 for (i=0; i<m; i++) { 202 if (im[i] < 0) continue; 203 #if defined(PETSC_USE_DEBUG) 204 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 205 #endif 206 if (im[i] >= rstart && im[i] < rend) { 207 row = im[i] - rstart; 208 lastcol1 = -1; 209 rp1 = aj + ai[row]; 210 ap1 = aa + ai[row]; 211 rmax1 = aimax[row]; 212 nrow1 = ailen[row]; 213 low1 = 0; 214 high1 = nrow1; 215 lastcol2 = -1; 216 rp2 = bj + bi[row]; 217 ap2 = ba + bi[row]; \ 218 rmax2 = bimax[row]; 219 nrow2 = bilen[row]; \ 220 low2 = 0; 221 high2 = nrow2; 222 223 for (j=0; j<n; j++) { 224 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 225 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 229 } else if (in[j] < 0) continue; 230 #if defined(PETSC_USE_DEBUG) 231 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 232 #endif 233 else { 234 if (mat->was_assembled) { 235 if (!aij->colmap) { 236 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined (PETSC_USE_CTABLE) 239 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 240 col--; 241 #else 242 col = aij->colmap[in[j]] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 245 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 248 B = aij->B; 249 b = (Mat_SeqAIJ*)B->data; 250 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 251 ba = b->a; 252 } 253 } else col = in[j]; 254 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 255 } 256 } 257 } else { 258 if (!aij->donotstash) { 259 if (roworiented) { 260 if (ignorezeroentries && v[i*n] == 0.0) continue; 261 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 262 } else { 263 if (ignorezeroentries && v[i] == 0.0) continue; 264 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 265 } 266 } 267 } 268 } 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatGetValues_MPIAIJ" 274 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 275 { 276 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 277 PetscErrorCode ierr; 278 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 279 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 280 281 PetscFunctionBegin; 282 for (i=0; i<m; i++) { 283 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 284 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 285 if (idxm[i] >= rstart && idxm[i] < rend) { 286 row = idxm[i] - rstart; 287 for (j=0; j<n; j++) { 288 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 289 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 290 if (idxn[j] >= cstart && idxn[j] < cend){ 291 col = idxn[j] - cstart; 292 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 293 } else { 294 if (!aij->colmap) { 295 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 296 } 297 #if defined (PETSC_USE_CTABLE) 298 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 299 col --; 300 #else 301 col = aij->colmap[idxn[j]] - 1; 302 #endif 303 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 304 else { 305 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 306 } 307 } 308 } 309 } else { 310 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 311 } 312 } 313 PetscFunctionReturn(0); 314 } 315 316 #undef __FUNCT__ 317 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 318 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 319 { 320 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 321 PetscErrorCode ierr; 322 PetscInt nstash,reallocs; 323 InsertMode addv; 324 325 PetscFunctionBegin; 326 if (aij->donotstash) { 327 PetscFunctionReturn(0); 328 } 329 330 /* make sure all processors are either in INSERTMODE or ADDMODE */ 331 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 332 if (addv == (ADD_VALUES|INSERT_VALUES)) { 333 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 334 } 335 mat->insertmode = addv; /* in case this processor had no cache */ 336 337 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 338 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 339 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 340 PetscFunctionReturn(0); 341 } 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 346 { 347 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 348 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 349 PetscErrorCode ierr; 350 PetscMPIInt n; 351 PetscInt i,j,rstart,ncols,flg; 352 PetscInt *row,*col,other_disassembled; 353 PetscScalar *val; 354 InsertMode addv = mat->insertmode; 355 356 PetscFunctionBegin; 357 if (!aij->donotstash) { 358 while (1) { 359 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 360 if (!flg) break; 361 362 for (i=0; i<n;) { 363 /* Now identify the consecutive vals belonging to the same row */ 364 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 365 if (j < n) ncols = j-i; 366 else ncols = n-i; 367 /* Now assemble all these values with a single function call */ 368 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 369 i = j; 370 } 371 } 372 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 373 } 374 a->compressedrow.use = PETSC_FALSE; 375 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 376 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 377 378 /* determine if any processor has disassembled, if so we must 379 also disassemble ourselfs, in order that we may reassemble. */ 380 /* 381 if nonzero structure of submatrix B cannot change then we know that 382 no processor disassembled thus we can skip this stuff 383 */ 384 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 385 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 386 if (mat->was_assembled && !other_disassembled) { 387 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 388 /* reaccess the b because aij->B was changed */ 389 b = (Mat_SeqAIJ *)aij->B->data; 390 } 391 } 392 393 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr); 397 b->compressedrow.use = PETSC_TRUE; 398 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 399 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 400 401 if (aij->rowvalues) { 402 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 403 aij->rowvalues = 0; 404 } 405 406 /* used by MatAXPY() */ 407 a->xtoy = 0; b->xtoy = 0; 408 a->XtoY = 0; b->XtoY = 0; 409 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 415 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 416 { 417 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 418 PetscErrorCode ierr; 419 420 PetscFunctionBegin; 421 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 422 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatZeroRows_MPIAIJ" 428 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 429 { 430 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 431 PetscErrorCode ierr; 432 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 433 PetscInt i,N,*rows,*owners = l->rowners; 434 PetscInt *nprocs,j,idx,nsends,row; 435 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 436 PetscInt *rvalues,count,base,slen,*source; 437 PetscInt *lens,*lrows,*values,rstart=l->rstart; 438 MPI_Comm comm = A->comm; 439 MPI_Request *send_waits,*recv_waits; 440 MPI_Status recv_status,*send_status; 441 IS istmp; 442 #if defined(PETSC_DEBUG) 443 PetscTruth found = PETSC_FALSE; 444 #endif 445 446 PetscFunctionBegin; 447 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 448 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 449 450 /* first count number of contributors to each processor */ 451 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 452 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 453 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 454 j = 0; 455 for (i=0; i<N; i++) { 456 if (lastidx > (idx = rows[i])) j = 0; 457 lastidx = idx; 458 for (; j<size; j++) { 459 if (idx >= owners[j] && idx < owners[j+1]) { 460 nprocs[2*j]++; 461 nprocs[2*j+1] = 1; 462 owner[i] = j; 463 #if defined(PETSC_DEBUG) 464 found = PETSC_TRUE; 465 #endif 466 break; 467 } 468 } 469 #if defined(PETSC_DEBUG) 470 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 471 found = PETSC_FALSE; 472 #endif 473 } 474 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 475 476 /* inform other processors of number of messages and max length*/ 477 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 478 479 /* post receives: */ 480 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 481 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 482 for (i=0; i<nrecvs; i++) { 483 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 484 } 485 486 /* do sends: 487 1) starts[i] gives the starting index in svalues for stuff going to 488 the ith processor 489 */ 490 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 491 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 492 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 493 starts[0] = 0; 494 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 495 for (i=0; i<N; i++) { 496 svalues[starts[owner[i]]++] = rows[i]; 497 } 498 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 499 500 starts[0] = 0; 501 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 502 count = 0; 503 for (i=0; i<size; i++) { 504 if (nprocs[2*i+1]) { 505 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 506 } 507 } 508 ierr = PetscFree(starts);CHKERRQ(ierr); 509 510 base = owners[rank]; 511 512 /* wait on receives */ 513 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 514 source = lens + nrecvs; 515 count = nrecvs; slen = 0; 516 while (count) { 517 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 518 /* unpack receives into our local space */ 519 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 520 source[imdex] = recv_status.MPI_SOURCE; 521 lens[imdex] = n; 522 slen += n; 523 count--; 524 } 525 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 526 527 /* move the data into the send scatter */ 528 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 529 count = 0; 530 for (i=0; i<nrecvs; i++) { 531 values = rvalues + i*nmax; 532 for (j=0; j<lens[i]; j++) { 533 lrows[count++] = values[j] - base; 534 } 535 } 536 ierr = PetscFree(rvalues);CHKERRQ(ierr); 537 ierr = PetscFree(lens);CHKERRQ(ierr); 538 ierr = PetscFree(owner);CHKERRQ(ierr); 539 ierr = PetscFree(nprocs);CHKERRQ(ierr); 540 541 /* actually zap the local rows */ 542 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 543 ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr); 544 545 /* 546 Zero the required rows. If the "diagonal block" of the matrix 547 is square and the user wishes to set the diagonal we use seperate 548 code so that MatSetValues() is not called for each diagonal allocating 549 new memory, thus calling lots of mallocs and slowing things down. 550 551 Contributed by: Mathew Knepley 552 */ 553 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 554 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 555 if (diag && (l->A->M == l->A->N)) { 556 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 557 } else if (diag) { 558 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 559 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 560 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 561 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 562 } 563 for (i = 0; i < slen; i++) { 564 row = lrows[i] + rstart; 565 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 566 } 567 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 568 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 569 } else { 570 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 571 } 572 ierr = ISDestroy(istmp);CHKERRQ(ierr); 573 ierr = PetscFree(lrows);CHKERRQ(ierr); 574 575 /* wait on sends */ 576 if (nsends) { 577 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 578 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 579 ierr = PetscFree(send_status);CHKERRQ(ierr); 580 } 581 ierr = PetscFree(send_waits);CHKERRQ(ierr); 582 ierr = PetscFree(svalues);CHKERRQ(ierr); 583 584 PetscFunctionReturn(0); 585 } 586 587 #undef __FUNCT__ 588 #define __FUNCT__ "MatMult_MPIAIJ" 589 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 590 { 591 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 592 PetscErrorCode ierr; 593 PetscInt nt; 594 595 PetscFunctionBegin; 596 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 597 if (nt != A->n) { 598 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 599 } 600 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 601 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 602 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 603 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 604 PetscFunctionReturn(0); 605 } 606 607 #undef __FUNCT__ 608 #define __FUNCT__ "MatMultAdd_MPIAIJ" 609 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 610 { 611 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 612 PetscErrorCode ierr; 613 614 PetscFunctionBegin; 615 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 616 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 617 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 618 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 624 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 625 { 626 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 627 PetscErrorCode ierr; 628 PetscTruth merged; 629 630 PetscFunctionBegin; 631 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 632 /* do nondiagonal part */ 633 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 634 if (!merged) { 635 /* send it on its way */ 636 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 637 /* do local part */ 638 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 639 /* receive remote parts: note this assumes the values are not actually */ 640 /* added in yy until the next line, */ 641 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 642 } else { 643 /* do local part */ 644 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 645 /* send it on its way */ 646 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 647 /* values actually were received in the Begin() but we need to call this nop */ 648 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 649 } 650 PetscFunctionReturn(0); 651 } 652 653 EXTERN_C_BEGIN 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 656 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 657 { 658 MPI_Comm comm; 659 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 660 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 661 IS Me,Notme; 662 PetscErrorCode ierr; 663 PetscInt M,N,first,last,*notme,i; 664 PetscMPIInt size; 665 666 PetscFunctionBegin; 667 668 /* Easy test: symmetric diagonal block */ 669 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 670 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 671 if (!*f) PetscFunctionReturn(0); 672 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 673 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 674 if (size == 1) PetscFunctionReturn(0); 675 676 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 677 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 678 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 679 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 680 for (i=0; i<first; i++) notme[i] = i; 681 for (i=last; i<M; i++) notme[i-last+first] = i; 682 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 683 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 684 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 685 Aoff = Aoffs[0]; 686 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 687 Boff = Boffs[0]; 688 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 689 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 690 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 691 ierr = ISDestroy(Me);CHKERRQ(ierr); 692 ierr = ISDestroy(Notme);CHKERRQ(ierr); 693 694 PetscFunctionReturn(0); 695 } 696 EXTERN_C_END 697 698 #undef __FUNCT__ 699 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 700 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 701 { 702 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 703 PetscErrorCode ierr; 704 705 PetscFunctionBegin; 706 /* do nondiagonal part */ 707 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 708 /* send it on its way */ 709 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 710 /* do local part */ 711 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 712 /* receive remote parts */ 713 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 714 PetscFunctionReturn(0); 715 } 716 717 /* 718 This only works correctly for square matrices where the subblock A->A is the 719 diagonal block 720 */ 721 #undef __FUNCT__ 722 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 723 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 724 { 725 PetscErrorCode ierr; 726 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 727 728 PetscFunctionBegin; 729 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 730 if (a->rstart != a->cstart || a->rend != a->cend) { 731 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 732 } 733 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 734 PetscFunctionReturn(0); 735 } 736 737 #undef __FUNCT__ 738 #define __FUNCT__ "MatScale_MPIAIJ" 739 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 740 { 741 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 742 PetscErrorCode ierr; 743 744 PetscFunctionBegin; 745 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 746 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNCT__ 751 #define __FUNCT__ "MatDestroy_MPIAIJ" 752 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 753 { 754 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 755 PetscErrorCode ierr; 756 757 PetscFunctionBegin; 758 #if defined(PETSC_USE_LOG) 759 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 760 #endif 761 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 762 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 763 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 764 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 765 #if defined (PETSC_USE_CTABLE) 766 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 767 #else 768 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 769 #endif 770 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 771 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 772 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 773 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 774 ierr = PetscFree(aij);CHKERRQ(ierr); 775 776 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 777 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 778 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 779 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 780 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 781 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 782 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 783 PetscFunctionReturn(0); 784 } 785 786 #undef __FUNCT__ 787 #define __FUNCT__ "MatView_MPIAIJ_Binary" 788 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 789 { 790 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 791 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 792 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 793 PetscErrorCode ierr; 794 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 795 int fd; 796 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 797 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 798 PetscScalar *column_values; 799 800 PetscFunctionBegin; 801 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 802 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 803 nz = A->nz + B->nz; 804 if (!rank) { 805 header[0] = MAT_FILE_COOKIE; 806 header[1] = mat->M; 807 header[2] = mat->N; 808 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 809 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 810 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 811 /* get largest number of rows any processor has */ 812 rlen = mat->m; 813 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 814 for (i=1; i<size; i++) { 815 rlen = PetscMax(rlen,range[i+1] - range[i]); 816 } 817 } else { 818 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 819 rlen = mat->m; 820 } 821 822 /* load up the local row counts */ 823 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 824 for (i=0; i<mat->m; i++) { 825 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 826 } 827 828 /* store the row lengths to the file */ 829 if (!rank) { 830 MPI_Status status; 831 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 832 for (i=1; i<size; i++) { 833 rlen = range[i+1] - range[i]; 834 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 835 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 836 } 837 } else { 838 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 839 } 840 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 841 842 /* load up the local column indices */ 843 nzmax = nz; /* )th processor needs space a largest processor needs */ 844 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 845 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 846 cnt = 0; 847 for (i=0; i<mat->m; i++) { 848 for (j=B->i[i]; j<B->i[i+1]; j++) { 849 if ( (col = garray[B->j[j]]) > cstart) break; 850 column_indices[cnt++] = col; 851 } 852 for (k=A->i[i]; k<A->i[i+1]; k++) { 853 column_indices[cnt++] = A->j[k] + cstart; 854 } 855 for (; j<B->i[i+1]; j++) { 856 column_indices[cnt++] = garray[B->j[j]]; 857 } 858 } 859 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 860 861 /* store the column indices to the file */ 862 if (!rank) { 863 MPI_Status status; 864 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 865 for (i=1; i<size; i++) { 866 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 867 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 868 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 869 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 870 } 871 } else { 872 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 873 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 874 } 875 ierr = PetscFree(column_indices);CHKERRQ(ierr); 876 877 /* load up the local column values */ 878 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 879 cnt = 0; 880 for (i=0; i<mat->m; i++) { 881 for (j=B->i[i]; j<B->i[i+1]; j++) { 882 if ( garray[B->j[j]] > cstart) break; 883 column_values[cnt++] = B->a[j]; 884 } 885 for (k=A->i[i]; k<A->i[i+1]; k++) { 886 column_values[cnt++] = A->a[k]; 887 } 888 for (; j<B->i[i+1]; j++) { 889 column_values[cnt++] = B->a[j]; 890 } 891 } 892 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 893 894 /* store the column values to the file */ 895 if (!rank) { 896 MPI_Status status; 897 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 898 for (i=1; i<size; i++) { 899 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 900 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 901 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 902 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 903 } 904 } else { 905 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 906 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 907 } 908 ierr = PetscFree(column_values);CHKERRQ(ierr); 909 PetscFunctionReturn(0); 910 } 911 912 #undef __FUNCT__ 913 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 914 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 915 { 916 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 917 PetscErrorCode ierr; 918 PetscMPIInt rank = aij->rank,size = aij->size; 919 PetscTruth isdraw,iascii,isbinary; 920 PetscViewer sviewer; 921 PetscViewerFormat format; 922 923 PetscFunctionBegin; 924 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 925 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 926 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 927 if (iascii) { 928 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 929 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 930 MatInfo info; 931 PetscTruth inodes; 932 933 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 934 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 935 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 936 if (!inodes) { 937 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 938 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 939 } else { 940 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 941 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 942 } 943 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 944 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 945 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 946 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 947 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 948 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } else if (format == PETSC_VIEWER_ASCII_INFO) { 951 PetscInt inodecount,inodelimit,*inodes; 952 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 953 if (inodes) { 954 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 955 } else { 956 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 957 } 958 PetscFunctionReturn(0); 959 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 960 PetscFunctionReturn(0); 961 } 962 } else if (isbinary) { 963 if (size == 1) { 964 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 965 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 966 } else { 967 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 968 } 969 PetscFunctionReturn(0); 970 } else if (isdraw) { 971 PetscDraw draw; 972 PetscTruth isnull; 973 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 974 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 975 } 976 977 if (size == 1) { 978 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 979 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 980 } else { 981 /* assemble the entire matrix onto first processor. */ 982 Mat A; 983 Mat_SeqAIJ *Aloc; 984 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 985 PetscScalar *a; 986 987 if (!rank) { 988 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 989 } else { 990 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 991 } 992 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 993 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 994 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 995 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 996 997 /* copy over the A part */ 998 Aloc = (Mat_SeqAIJ*)aij->A->data; 999 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1000 row = aij->rstart; 1001 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 1002 for (i=0; i<m; i++) { 1003 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1004 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1005 } 1006 aj = Aloc->j; 1007 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 1008 1009 /* copy over the B part */ 1010 Aloc = (Mat_SeqAIJ*)aij->B->data; 1011 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1012 row = aij->rstart; 1013 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1014 ct = cols; 1015 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1016 for (i=0; i<m; i++) { 1017 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1018 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1019 } 1020 ierr = PetscFree(ct);CHKERRQ(ierr); 1021 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1022 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1023 /* 1024 Everyone has to call to draw the matrix since the graphics waits are 1025 synchronized across all processors that share the PetscDraw object 1026 */ 1027 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1028 if (!rank) { 1029 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1030 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1031 } 1032 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1033 ierr = MatDestroy(A);CHKERRQ(ierr); 1034 } 1035 PetscFunctionReturn(0); 1036 } 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatView_MPIAIJ" 1040 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1041 { 1042 PetscErrorCode ierr; 1043 PetscTruth iascii,isdraw,issocket,isbinary; 1044 1045 PetscFunctionBegin; 1046 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1047 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1048 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1049 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1050 if (iascii || isdraw || isbinary || issocket) { 1051 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1052 } else { 1053 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1054 } 1055 PetscFunctionReturn(0); 1056 } 1057 1058 1059 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "MatRelax_MPIAIJ" 1062 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1063 { 1064 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1065 PetscErrorCode ierr; 1066 Vec bb1; 1067 PetscScalar mone=-1.0; 1068 1069 PetscFunctionBegin; 1070 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1071 1072 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1073 1074 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1075 if (flag & SOR_ZERO_INITIAL_GUESS) { 1076 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1077 its--; 1078 } 1079 1080 while (its--) { 1081 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1082 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1083 1084 /* update rhs: bb1 = bb - B*x */ 1085 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1086 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1087 1088 /* local sweep */ 1089 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1090 CHKERRQ(ierr); 1091 } 1092 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1093 if (flag & SOR_ZERO_INITIAL_GUESS) { 1094 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1095 its--; 1096 } 1097 while (its--) { 1098 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1099 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1100 1101 /* update rhs: bb1 = bb - B*x */ 1102 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1103 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1104 1105 /* local sweep */ 1106 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1107 CHKERRQ(ierr); 1108 } 1109 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1110 if (flag & SOR_ZERO_INITIAL_GUESS) { 1111 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1112 its--; 1113 } 1114 while (its--) { 1115 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1116 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1117 1118 /* update rhs: bb1 = bb - B*x */ 1119 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1120 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1121 1122 /* local sweep */ 1123 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1124 CHKERRQ(ierr); 1125 } 1126 } else { 1127 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1128 } 1129 1130 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #undef __FUNCT__ 1135 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1136 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1137 { 1138 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1139 Mat A = mat->A,B = mat->B; 1140 PetscErrorCode ierr; 1141 PetscReal isend[5],irecv[5]; 1142 1143 PetscFunctionBegin; 1144 info->block_size = 1.0; 1145 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1146 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1147 isend[3] = info->memory; isend[4] = info->mallocs; 1148 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1149 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1150 isend[3] += info->memory; isend[4] += info->mallocs; 1151 if (flag == MAT_LOCAL) { 1152 info->nz_used = isend[0]; 1153 info->nz_allocated = isend[1]; 1154 info->nz_unneeded = isend[2]; 1155 info->memory = isend[3]; 1156 info->mallocs = isend[4]; 1157 } else if (flag == MAT_GLOBAL_MAX) { 1158 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1159 info->nz_used = irecv[0]; 1160 info->nz_allocated = irecv[1]; 1161 info->nz_unneeded = irecv[2]; 1162 info->memory = irecv[3]; 1163 info->mallocs = irecv[4]; 1164 } else if (flag == MAT_GLOBAL_SUM) { 1165 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1166 info->nz_used = irecv[0]; 1167 info->nz_allocated = irecv[1]; 1168 info->nz_unneeded = irecv[2]; 1169 info->memory = irecv[3]; 1170 info->mallocs = irecv[4]; 1171 } 1172 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1173 info->fill_ratio_needed = 0; 1174 info->factor_mallocs = 0; 1175 info->rows_global = (double)matin->M; 1176 info->columns_global = (double)matin->N; 1177 info->rows_local = (double)matin->m; 1178 info->columns_local = (double)matin->N; 1179 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "MatSetOption_MPIAIJ" 1185 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1186 { 1187 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1188 PetscErrorCode ierr; 1189 1190 PetscFunctionBegin; 1191 switch (op) { 1192 case MAT_NO_NEW_NONZERO_LOCATIONS: 1193 case MAT_YES_NEW_NONZERO_LOCATIONS: 1194 case MAT_COLUMNS_UNSORTED: 1195 case MAT_COLUMNS_SORTED: 1196 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1197 case MAT_KEEP_ZEROED_ROWS: 1198 case MAT_NEW_NONZERO_LOCATION_ERR: 1199 case MAT_USE_INODES: 1200 case MAT_DO_NOT_USE_INODES: 1201 case MAT_IGNORE_ZERO_ENTRIES: 1202 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1203 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1204 break; 1205 case MAT_ROW_ORIENTED: 1206 a->roworiented = PETSC_TRUE; 1207 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1208 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1209 break; 1210 case MAT_ROWS_SORTED: 1211 case MAT_ROWS_UNSORTED: 1212 case MAT_YES_NEW_DIAGONALS: 1213 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1214 break; 1215 case MAT_COLUMN_ORIENTED: 1216 a->roworiented = PETSC_FALSE; 1217 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1218 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1219 break; 1220 case MAT_IGNORE_OFF_PROC_ENTRIES: 1221 a->donotstash = PETSC_TRUE; 1222 break; 1223 case MAT_NO_NEW_DIAGONALS: 1224 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1225 case MAT_SYMMETRIC: 1226 case MAT_STRUCTURALLY_SYMMETRIC: 1227 case MAT_HERMITIAN: 1228 case MAT_SYMMETRY_ETERNAL: 1229 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1230 break; 1231 case MAT_NOT_SYMMETRIC: 1232 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1233 case MAT_NOT_HERMITIAN: 1234 case MAT_NOT_SYMMETRY_ETERNAL: 1235 break; 1236 default: 1237 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1238 } 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatGetRow_MPIAIJ" 1244 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1245 { 1246 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1247 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1248 PetscErrorCode ierr; 1249 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1250 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1251 PetscInt *cmap,*idx_p; 1252 1253 PetscFunctionBegin; 1254 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1255 mat->getrowactive = PETSC_TRUE; 1256 1257 if (!mat->rowvalues && (idx || v)) { 1258 /* 1259 allocate enough space to hold information from the longest row. 1260 */ 1261 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1262 PetscInt max = 1,tmp; 1263 for (i=0; i<matin->m; i++) { 1264 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1265 if (max < tmp) { max = tmp; } 1266 } 1267 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1268 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1269 } 1270 1271 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1272 lrow = row - rstart; 1273 1274 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1275 if (!v) {pvA = 0; pvB = 0;} 1276 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1277 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1278 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1279 nztot = nzA + nzB; 1280 1281 cmap = mat->garray; 1282 if (v || idx) { 1283 if (nztot) { 1284 /* Sort by increasing column numbers, assuming A and B already sorted */ 1285 PetscInt imark = -1; 1286 if (v) { 1287 *v = v_p = mat->rowvalues; 1288 for (i=0; i<nzB; i++) { 1289 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1290 else break; 1291 } 1292 imark = i; 1293 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1294 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1295 } 1296 if (idx) { 1297 *idx = idx_p = mat->rowindices; 1298 if (imark > -1) { 1299 for (i=0; i<imark; i++) { 1300 idx_p[i] = cmap[cworkB[i]]; 1301 } 1302 } else { 1303 for (i=0; i<nzB; i++) { 1304 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1305 else break; 1306 } 1307 imark = i; 1308 } 1309 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1310 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1311 } 1312 } else { 1313 if (idx) *idx = 0; 1314 if (v) *v = 0; 1315 } 1316 } 1317 *nz = nztot; 1318 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1319 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1325 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1326 { 1327 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1328 1329 PetscFunctionBegin; 1330 if (!aij->getrowactive) { 1331 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1332 } 1333 aij->getrowactive = PETSC_FALSE; 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatNorm_MPIAIJ" 1339 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1340 { 1341 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1342 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1343 PetscErrorCode ierr; 1344 PetscInt i,j,cstart = aij->cstart; 1345 PetscReal sum = 0.0; 1346 PetscScalar *v; 1347 1348 PetscFunctionBegin; 1349 if (aij->size == 1) { 1350 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1351 } else { 1352 if (type == NORM_FROBENIUS) { 1353 v = amat->a; 1354 for (i=0; i<amat->nz; i++) { 1355 #if defined(PETSC_USE_COMPLEX) 1356 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1357 #else 1358 sum += (*v)*(*v); v++; 1359 #endif 1360 } 1361 v = bmat->a; 1362 for (i=0; i<bmat->nz; i++) { 1363 #if defined(PETSC_USE_COMPLEX) 1364 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1365 #else 1366 sum += (*v)*(*v); v++; 1367 #endif 1368 } 1369 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1370 *norm = sqrt(*norm); 1371 } else if (type == NORM_1) { /* max column norm */ 1372 PetscReal *tmp,*tmp2; 1373 PetscInt *jj,*garray = aij->garray; 1374 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1375 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1376 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1377 *norm = 0.0; 1378 v = amat->a; jj = amat->j; 1379 for (j=0; j<amat->nz; j++) { 1380 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1381 } 1382 v = bmat->a; jj = bmat->j; 1383 for (j=0; j<bmat->nz; j++) { 1384 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1385 } 1386 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1387 for (j=0; j<mat->N; j++) { 1388 if (tmp2[j] > *norm) *norm = tmp2[j]; 1389 } 1390 ierr = PetscFree(tmp);CHKERRQ(ierr); 1391 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1392 } else if (type == NORM_INFINITY) { /* max row norm */ 1393 PetscReal ntemp = 0.0; 1394 for (j=0; j<aij->A->m; j++) { 1395 v = amat->a + amat->i[j]; 1396 sum = 0.0; 1397 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1398 sum += PetscAbsScalar(*v); v++; 1399 } 1400 v = bmat->a + bmat->i[j]; 1401 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1402 sum += PetscAbsScalar(*v); v++; 1403 } 1404 if (sum > ntemp) ntemp = sum; 1405 } 1406 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1407 } else { 1408 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1409 } 1410 } 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNCT__ 1415 #define __FUNCT__ "MatTranspose_MPIAIJ" 1416 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1417 { 1418 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1419 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1420 PetscErrorCode ierr; 1421 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1422 Mat B; 1423 PetscScalar *array; 1424 1425 PetscFunctionBegin; 1426 if (!matout && M != N) { 1427 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1428 } 1429 1430 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1431 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1432 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1433 1434 /* copy over the A part */ 1435 Aloc = (Mat_SeqAIJ*)a->A->data; 1436 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1437 row = a->rstart; 1438 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1439 for (i=0; i<m; i++) { 1440 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1441 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1442 } 1443 aj = Aloc->j; 1444 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1445 1446 /* copy over the B part */ 1447 Aloc = (Mat_SeqAIJ*)a->B->data; 1448 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1449 row = a->rstart; 1450 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1451 ct = cols; 1452 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1453 for (i=0; i<m; i++) { 1454 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1455 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1456 } 1457 ierr = PetscFree(ct);CHKERRQ(ierr); 1458 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1459 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1460 if (matout) { 1461 *matout = B; 1462 } else { 1463 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1464 } 1465 PetscFunctionReturn(0); 1466 } 1467 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1470 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1471 { 1472 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1473 Mat a = aij->A,b = aij->B; 1474 PetscErrorCode ierr; 1475 PetscInt s1,s2,s3; 1476 1477 PetscFunctionBegin; 1478 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1479 if (rr) { 1480 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1481 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1482 /* Overlap communication with computation. */ 1483 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1484 } 1485 if (ll) { 1486 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1487 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1488 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1489 } 1490 /* scale the diagonal block */ 1491 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1492 1493 if (rr) { 1494 /* Do a scatter end and then right scale the off-diagonal block */ 1495 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1496 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1497 } 1498 1499 PetscFunctionReturn(0); 1500 } 1501 1502 1503 #undef __FUNCT__ 1504 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1505 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1506 { 1507 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1508 PetscErrorCode ierr; 1509 1510 PetscFunctionBegin; 1511 if (!a->rank) { 1512 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1513 } 1514 PetscFunctionReturn(0); 1515 } 1516 1517 #undef __FUNCT__ 1518 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1519 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1520 { 1521 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1526 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1527 PetscFunctionReturn(0); 1528 } 1529 #undef __FUNCT__ 1530 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1531 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1532 { 1533 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1534 PetscErrorCode ierr; 1535 1536 PetscFunctionBegin; 1537 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatEqual_MPIAIJ" 1543 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1544 { 1545 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1546 Mat a,b,c,d; 1547 PetscTruth flg; 1548 PetscErrorCode ierr; 1549 1550 PetscFunctionBegin; 1551 a = matA->A; b = matA->B; 1552 c = matB->A; d = matB->B; 1553 1554 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1555 if (flg) { 1556 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1557 } 1558 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatCopy_MPIAIJ" 1564 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1565 { 1566 PetscErrorCode ierr; 1567 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1568 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1569 1570 PetscFunctionBegin; 1571 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1572 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1573 /* because of the column compression in the off-processor part of the matrix a->B, 1574 the number of columns in a->B and b->B may be different, hence we cannot call 1575 the MatCopy() directly on the two parts. If need be, we can provide a more 1576 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1577 then copying the submatrices */ 1578 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1579 } else { 1580 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1581 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1582 } 1583 PetscFunctionReturn(0); 1584 } 1585 1586 #undef __FUNCT__ 1587 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1588 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1589 { 1590 PetscErrorCode ierr; 1591 1592 PetscFunctionBegin; 1593 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1594 PetscFunctionReturn(0); 1595 } 1596 1597 #include "petscblaslapack.h" 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatAXPY_MPIAIJ" 1600 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1601 { 1602 PetscErrorCode ierr; 1603 PetscInt i; 1604 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1605 PetscBLASInt bnz,one=1; 1606 Mat_SeqAIJ *x,*y; 1607 1608 PetscFunctionBegin; 1609 if (str == SAME_NONZERO_PATTERN) { 1610 x = (Mat_SeqAIJ *)xx->A->data; 1611 y = (Mat_SeqAIJ *)yy->A->data; 1612 bnz = (PetscBLASInt)x->nz; 1613 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1614 x = (Mat_SeqAIJ *)xx->B->data; 1615 y = (Mat_SeqAIJ *)yy->B->data; 1616 bnz = (PetscBLASInt)x->nz; 1617 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1618 } else if (str == SUBSET_NONZERO_PATTERN) { 1619 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1620 1621 x = (Mat_SeqAIJ *)xx->B->data; 1622 y = (Mat_SeqAIJ *)yy->B->data; 1623 if (y->xtoy && y->XtoY != xx->B) { 1624 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1625 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1626 } 1627 if (!y->xtoy) { /* get xtoy */ 1628 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1629 y->XtoY = xx->B; 1630 } 1631 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1632 } else { 1633 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1634 } 1635 PetscFunctionReturn(0); 1636 } 1637 1638 /* -------------------------------------------------------------------*/ 1639 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1640 MatGetRow_MPIAIJ, 1641 MatRestoreRow_MPIAIJ, 1642 MatMult_MPIAIJ, 1643 /* 4*/ MatMultAdd_MPIAIJ, 1644 MatMultTranspose_MPIAIJ, 1645 MatMultTransposeAdd_MPIAIJ, 1646 0, 1647 0, 1648 0, 1649 /*10*/ 0, 1650 0, 1651 0, 1652 MatRelax_MPIAIJ, 1653 MatTranspose_MPIAIJ, 1654 /*15*/ MatGetInfo_MPIAIJ, 1655 MatEqual_MPIAIJ, 1656 MatGetDiagonal_MPIAIJ, 1657 MatDiagonalScale_MPIAIJ, 1658 MatNorm_MPIAIJ, 1659 /*20*/ MatAssemblyBegin_MPIAIJ, 1660 MatAssemblyEnd_MPIAIJ, 1661 0, 1662 MatSetOption_MPIAIJ, 1663 MatZeroEntries_MPIAIJ, 1664 /*25*/ MatZeroRows_MPIAIJ, 1665 0, 1666 0, 1667 0, 1668 0, 1669 /*30*/ MatSetUpPreallocation_MPIAIJ, 1670 0, 1671 0, 1672 0, 1673 0, 1674 /*35*/ MatDuplicate_MPIAIJ, 1675 0, 1676 0, 1677 0, 1678 0, 1679 /*40*/ MatAXPY_MPIAIJ, 1680 MatGetSubMatrices_MPIAIJ, 1681 MatIncreaseOverlap_MPIAIJ, 1682 MatGetValues_MPIAIJ, 1683 MatCopy_MPIAIJ, 1684 /*45*/ MatPrintHelp_MPIAIJ, 1685 MatScale_MPIAIJ, 1686 0, 1687 0, 1688 0, 1689 /*50*/ MatSetBlockSize_MPIAIJ, 1690 0, 1691 0, 1692 0, 1693 0, 1694 /*55*/ MatFDColoringCreate_MPIAIJ, 1695 0, 1696 MatSetUnfactored_MPIAIJ, 1697 0, 1698 0, 1699 /*60*/ MatGetSubMatrix_MPIAIJ, 1700 MatDestroy_MPIAIJ, 1701 MatView_MPIAIJ, 1702 MatGetPetscMaps_Petsc, 1703 0, 1704 /*65*/ 0, 1705 0, 1706 0, 1707 0, 1708 0, 1709 /*70*/ 0, 1710 0, 1711 MatSetColoring_MPIAIJ, 1712 #if defined(PETSC_HAVE_ADIC) 1713 MatSetValuesAdic_MPIAIJ, 1714 #else 1715 0, 1716 #endif 1717 MatSetValuesAdifor_MPIAIJ, 1718 /*75*/ 0, 1719 0, 1720 0, 1721 0, 1722 0, 1723 /*80*/ 0, 1724 0, 1725 0, 1726 0, 1727 /*84*/ MatLoad_MPIAIJ, 1728 0, 1729 0, 1730 0, 1731 0, 1732 0, 1733 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1734 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1735 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1736 MatPtAP_Basic, 1737 MatPtAPSymbolic_MPIAIJ, 1738 /*95*/ MatPtAPNumeric_MPIAIJ, 1739 0, 1740 0, 1741 0, 1742 0, 1743 /*100*/0, 1744 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1745 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1746 }; 1747 1748 /* ----------------------------------------------------------------------------------------*/ 1749 1750 EXTERN_C_BEGIN 1751 #undef __FUNCT__ 1752 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1753 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1754 { 1755 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1756 PetscErrorCode ierr; 1757 1758 PetscFunctionBegin; 1759 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1760 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1761 PetscFunctionReturn(0); 1762 } 1763 EXTERN_C_END 1764 1765 EXTERN_C_BEGIN 1766 #undef __FUNCT__ 1767 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1768 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1769 { 1770 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1771 PetscErrorCode ierr; 1772 1773 PetscFunctionBegin; 1774 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1775 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1776 PetscFunctionReturn(0); 1777 } 1778 EXTERN_C_END 1779 1780 #include "petscpc.h" 1781 EXTERN_C_BEGIN 1782 #undef __FUNCT__ 1783 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1784 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1785 { 1786 Mat_MPIAIJ *b; 1787 PetscErrorCode ierr; 1788 PetscInt i; 1789 1790 PetscFunctionBegin; 1791 B->preallocated = PETSC_TRUE; 1792 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1793 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1794 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1795 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1796 if (d_nnz) { 1797 for (i=0; i<B->m; i++) { 1798 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1799 } 1800 } 1801 if (o_nnz) { 1802 for (i=0; i<B->m; i++) { 1803 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1804 } 1805 } 1806 b = (Mat_MPIAIJ*)B->data; 1807 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1808 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1809 1810 PetscFunctionReturn(0); 1811 } 1812 EXTERN_C_END 1813 1814 #undef __FUNCT__ 1815 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1816 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1817 { 1818 Mat mat; 1819 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1820 PetscErrorCode ierr; 1821 1822 PetscFunctionBegin; 1823 *newmat = 0; 1824 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1825 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1826 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1827 a = (Mat_MPIAIJ*)mat->data; 1828 1829 mat->factor = matin->factor; 1830 mat->bs = matin->bs; 1831 mat->assembled = PETSC_TRUE; 1832 mat->insertmode = NOT_SET_VALUES; 1833 mat->preallocated = PETSC_TRUE; 1834 1835 a->rstart = oldmat->rstart; 1836 a->rend = oldmat->rend; 1837 a->cstart = oldmat->cstart; 1838 a->cend = oldmat->cend; 1839 a->size = oldmat->size; 1840 a->rank = oldmat->rank; 1841 a->donotstash = oldmat->donotstash; 1842 a->roworiented = oldmat->roworiented; 1843 a->rowindices = 0; 1844 a->rowvalues = 0; 1845 a->getrowactive = PETSC_FALSE; 1846 1847 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1848 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1849 if (oldmat->colmap) { 1850 #if defined (PETSC_USE_CTABLE) 1851 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1852 #else 1853 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1854 ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1855 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1856 #endif 1857 } else a->colmap = 0; 1858 if (oldmat->garray) { 1859 PetscInt len; 1860 len = oldmat->B->n; 1861 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1862 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1863 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1864 } else a->garray = 0; 1865 1866 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1867 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1868 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1869 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1870 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1871 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1872 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1873 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1874 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1875 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1876 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1877 *newmat = mat; 1878 PetscFunctionReturn(0); 1879 } 1880 1881 #include "petscsys.h" 1882 1883 #undef __FUNCT__ 1884 #define __FUNCT__ "MatLoad_MPIAIJ" 1885 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1886 { 1887 Mat A; 1888 PetscScalar *vals,*svals; 1889 MPI_Comm comm = ((PetscObject)viewer)->comm; 1890 MPI_Status status; 1891 PetscErrorCode ierr; 1892 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1893 PetscInt i,nz,j,rstart,rend,mmax; 1894 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1895 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1896 PetscInt cend,cstart,n,*rowners; 1897 int fd; 1898 1899 PetscFunctionBegin; 1900 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1901 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1902 if (!rank) { 1903 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1904 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1905 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1906 } 1907 1908 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1909 M = header[1]; N = header[2]; 1910 /* determine ownership of all rows */ 1911 m = M/size + ((M % size) > rank); 1912 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1913 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1914 1915 /* First process needs enough room for process with most rows */ 1916 if (!rank) { 1917 mmax = rowners[1]; 1918 for (i=2; i<size; i++) { 1919 mmax = PetscMax(mmax,rowners[i]); 1920 } 1921 } else mmax = m; 1922 1923 rowners[0] = 0; 1924 for (i=2; i<=size; i++) { 1925 mmax = PetscMax(mmax,rowners[i]); 1926 rowners[i] += rowners[i-1]; 1927 } 1928 rstart = rowners[rank]; 1929 rend = rowners[rank+1]; 1930 1931 /* distribute row lengths to all processors */ 1932 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 1933 if (!rank) { 1934 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1935 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1936 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1937 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1938 for (j=0; j<m; j++) { 1939 procsnz[0] += ourlens[j]; 1940 } 1941 for (i=1; i<size; i++) { 1942 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1943 /* calculate the number of nonzeros on each processor */ 1944 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1945 procsnz[i] += rowlengths[j]; 1946 } 1947 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1948 } 1949 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1950 } else { 1951 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1952 } 1953 1954 if (!rank) { 1955 /* determine max buffer needed and allocate it */ 1956 maxnz = 0; 1957 for (i=0; i<size; i++) { 1958 maxnz = PetscMax(maxnz,procsnz[i]); 1959 } 1960 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1961 1962 /* read in my part of the matrix column indices */ 1963 nz = procsnz[0]; 1964 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1965 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1966 1967 /* read in every one elses and ship off */ 1968 for (i=1; i<size; i++) { 1969 nz = procsnz[i]; 1970 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1971 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1972 } 1973 ierr = PetscFree(cols);CHKERRQ(ierr); 1974 } else { 1975 /* determine buffer space needed for message */ 1976 nz = 0; 1977 for (i=0; i<m; i++) { 1978 nz += ourlens[i]; 1979 } 1980 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1981 1982 /* receive message of column indices*/ 1983 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1984 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1985 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1986 } 1987 1988 /* determine column ownership if matrix is not square */ 1989 if (N != M) { 1990 n = N/size + ((N % size) > rank); 1991 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1992 cstart = cend - n; 1993 } else { 1994 cstart = rstart; 1995 cend = rend; 1996 n = cend - cstart; 1997 } 1998 1999 /* loop over local rows, determining number of off diagonal entries */ 2000 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2001 jj = 0; 2002 for (i=0; i<m; i++) { 2003 for (j=0; j<ourlens[i]; j++) { 2004 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2005 jj++; 2006 } 2007 } 2008 2009 /* create our matrix */ 2010 for (i=0; i<m; i++) { 2011 ourlens[i] -= offlens[i]; 2012 } 2013 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2014 ierr = MatSetType(A,type);CHKERRQ(ierr); 2015 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2016 2017 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2018 for (i=0; i<m; i++) { 2019 ourlens[i] += offlens[i]; 2020 } 2021 2022 if (!rank) { 2023 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2024 2025 /* read in my part of the matrix numerical values */ 2026 nz = procsnz[0]; 2027 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2028 2029 /* insert into matrix */ 2030 jj = rstart; 2031 smycols = mycols; 2032 svals = vals; 2033 for (i=0; i<m; i++) { 2034 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2035 smycols += ourlens[i]; 2036 svals += ourlens[i]; 2037 jj++; 2038 } 2039 2040 /* read in other processors and ship out */ 2041 for (i=1; i<size; i++) { 2042 nz = procsnz[i]; 2043 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2044 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2045 } 2046 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2047 } else { 2048 /* receive numeric values */ 2049 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2050 2051 /* receive message of values*/ 2052 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2053 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2054 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2055 2056 /* insert into matrix */ 2057 jj = rstart; 2058 smycols = mycols; 2059 svals = vals; 2060 for (i=0; i<m; i++) { 2061 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2062 smycols += ourlens[i]; 2063 svals += ourlens[i]; 2064 jj++; 2065 } 2066 } 2067 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2068 ierr = PetscFree(vals);CHKERRQ(ierr); 2069 ierr = PetscFree(mycols);CHKERRQ(ierr); 2070 ierr = PetscFree(rowners);CHKERRQ(ierr); 2071 2072 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2073 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2074 *newmat = A; 2075 PetscFunctionReturn(0); 2076 } 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2080 /* 2081 Not great since it makes two copies of the submatrix, first an SeqAIJ 2082 in local and then by concatenating the local matrices the end result. 2083 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2084 */ 2085 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2086 { 2087 PetscErrorCode ierr; 2088 PetscMPIInt rank,size; 2089 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2090 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2091 Mat *local,M,Mreuse; 2092 PetscScalar *vwork,*aa; 2093 MPI_Comm comm = mat->comm; 2094 Mat_SeqAIJ *aij; 2095 2096 2097 PetscFunctionBegin; 2098 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2099 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2100 2101 if (call == MAT_REUSE_MATRIX) { 2102 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2103 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2104 local = &Mreuse; 2105 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2106 } else { 2107 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2108 Mreuse = *local; 2109 ierr = PetscFree(local);CHKERRQ(ierr); 2110 } 2111 2112 /* 2113 m - number of local rows 2114 n - number of columns (same on all processors) 2115 rstart - first row in new global matrix generated 2116 */ 2117 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2118 if (call == MAT_INITIAL_MATRIX) { 2119 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2120 ii = aij->i; 2121 jj = aij->j; 2122 2123 /* 2124 Determine the number of non-zeros in the diagonal and off-diagonal 2125 portions of the matrix in order to do correct preallocation 2126 */ 2127 2128 /* first get start and end of "diagonal" columns */ 2129 if (csize == PETSC_DECIDE) { 2130 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2131 if (mglobal == n) { /* square matrix */ 2132 nlocal = m; 2133 } else { 2134 nlocal = n/size + ((n % size) > rank); 2135 } 2136 } else { 2137 nlocal = csize; 2138 } 2139 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2140 rstart = rend - nlocal; 2141 if (rank == size - 1 && rend != n) { 2142 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2143 } 2144 2145 /* next, compute all the lengths */ 2146 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2147 olens = dlens + m; 2148 for (i=0; i<m; i++) { 2149 jend = ii[i+1] - ii[i]; 2150 olen = 0; 2151 dlen = 0; 2152 for (j=0; j<jend; j++) { 2153 if (*jj < rstart || *jj >= rend) olen++; 2154 else dlen++; 2155 jj++; 2156 } 2157 olens[i] = olen; 2158 dlens[i] = dlen; 2159 } 2160 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2161 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2162 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2163 ierr = PetscFree(dlens);CHKERRQ(ierr); 2164 } else { 2165 PetscInt ml,nl; 2166 2167 M = *newmat; 2168 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2169 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2170 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2171 /* 2172 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2173 rather than the slower MatSetValues(). 2174 */ 2175 M->was_assembled = PETSC_TRUE; 2176 M->assembled = PETSC_FALSE; 2177 } 2178 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2179 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2180 ii = aij->i; 2181 jj = aij->j; 2182 aa = aij->a; 2183 for (i=0; i<m; i++) { 2184 row = rstart + i; 2185 nz = ii[i+1] - ii[i]; 2186 cwork = jj; jj += nz; 2187 vwork = aa; aa += nz; 2188 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2189 } 2190 2191 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2192 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2193 *newmat = M; 2194 2195 /* save submatrix used in processor for next request */ 2196 if (call == MAT_INITIAL_MATRIX) { 2197 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2198 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2199 } 2200 2201 PetscFunctionReturn(0); 2202 } 2203 2204 EXTERN_C_BEGIN 2205 #undef __FUNCT__ 2206 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2207 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2208 { 2209 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2210 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2211 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2212 const PetscInt *JJ; 2213 PetscScalar *values; 2214 PetscErrorCode ierr; 2215 2216 PetscFunctionBegin; 2217 #if defined(PETSC_OPT_g) 2218 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2219 #endif 2220 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2221 o_nnz = d_nnz + m; 2222 2223 for (i=0; i<m; i++) { 2224 nnz = I[i+1]- I[i]; 2225 JJ = J + I[i]; 2226 nnz_max = PetscMax(nnz_max,nnz); 2227 #if defined(PETSC_OPT_g) 2228 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2229 #endif 2230 for (j=0; j<nnz; j++) { 2231 if (*JJ >= cstart) break; 2232 JJ++; 2233 } 2234 d = 0; 2235 for (; j<nnz; j++) { 2236 if (*JJ++ >= cend) break; 2237 d++; 2238 } 2239 d_nnz[i] = d; 2240 o_nnz[i] = nnz - d; 2241 } 2242 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2243 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2244 2245 if (v) values = (PetscScalar*)v; 2246 else { 2247 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2248 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2249 } 2250 2251 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2252 for (i=0; i<m; i++) { 2253 ii = i + rstart; 2254 nnz = I[i+1]- I[i]; 2255 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2256 } 2257 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2258 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2259 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2260 2261 if (!v) { 2262 ierr = PetscFree(values);CHKERRQ(ierr); 2263 } 2264 PetscFunctionReturn(0); 2265 } 2266 EXTERN_C_END 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2270 /*@C 2271 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2272 (the default parallel PETSc format). 2273 2274 Collective on MPI_Comm 2275 2276 Input Parameters: 2277 + A - the matrix 2278 . i - the indices into j for the start of each local row (starts with zero) 2279 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2280 - v - optional values in the matrix 2281 2282 Level: developer 2283 2284 .keywords: matrix, aij, compressed row, sparse, parallel 2285 2286 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2287 @*/ 2288 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2289 { 2290 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2291 2292 PetscFunctionBegin; 2293 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2294 if (f) { 2295 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2296 } 2297 PetscFunctionReturn(0); 2298 } 2299 2300 #undef __FUNCT__ 2301 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2302 /*@C 2303 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2304 (the default parallel PETSc format). For good matrix assembly performance 2305 the user should preallocate the matrix storage by setting the parameters 2306 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2307 performance can be increased by more than a factor of 50. 2308 2309 Collective on MPI_Comm 2310 2311 Input Parameters: 2312 + A - the matrix 2313 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2314 (same value is used for all local rows) 2315 . d_nnz - array containing the number of nonzeros in the various rows of the 2316 DIAGONAL portion of the local submatrix (possibly different for each row) 2317 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2318 The size of this array is equal to the number of local rows, i.e 'm'. 2319 You must leave room for the diagonal entry even if it is zero. 2320 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2321 submatrix (same value is used for all local rows). 2322 - o_nnz - array containing the number of nonzeros in the various rows of the 2323 OFF-DIAGONAL portion of the local submatrix (possibly different for 2324 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2325 structure. The size of this array is equal to the number 2326 of local rows, i.e 'm'. 2327 2328 If the *_nnz parameter is given then the *_nz parameter is ignored 2329 2330 The AIJ format (also called the Yale sparse matrix format or 2331 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2332 storage. The stored row and column indices begin with zero. See the users manual for details. 2333 2334 The parallel matrix is partitioned such that the first m0 rows belong to 2335 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2336 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2337 2338 The DIAGONAL portion of the local submatrix of a processor can be defined 2339 as the submatrix which is obtained by extraction the part corresponding 2340 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2341 first row that belongs to the processor, and r2 is the last row belonging 2342 to the this processor. This is a square mxm matrix. The remaining portion 2343 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2344 2345 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2346 2347 Example usage: 2348 2349 Consider the following 8x8 matrix with 34 non-zero values, that is 2350 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2351 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2352 as follows: 2353 2354 .vb 2355 1 2 0 | 0 3 0 | 0 4 2356 Proc0 0 5 6 | 7 0 0 | 8 0 2357 9 0 10 | 11 0 0 | 12 0 2358 ------------------------------------- 2359 13 0 14 | 15 16 17 | 0 0 2360 Proc1 0 18 0 | 19 20 21 | 0 0 2361 0 0 0 | 22 23 0 | 24 0 2362 ------------------------------------- 2363 Proc2 25 26 27 | 0 0 28 | 29 0 2364 30 0 0 | 31 32 33 | 0 34 2365 .ve 2366 2367 This can be represented as a collection of submatrices as: 2368 2369 .vb 2370 A B C 2371 D E F 2372 G H I 2373 .ve 2374 2375 Where the submatrices A,B,C are owned by proc0, D,E,F are 2376 owned by proc1, G,H,I are owned by proc2. 2377 2378 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2379 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2380 The 'M','N' parameters are 8,8, and have the same values on all procs. 2381 2382 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2383 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2384 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2385 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2386 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2387 matrix, ans [DF] as another SeqAIJ matrix. 2388 2389 When d_nz, o_nz parameters are specified, d_nz storage elements are 2390 allocated for every row of the local diagonal submatrix, and o_nz 2391 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2392 One way to choose d_nz and o_nz is to use the max nonzerors per local 2393 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2394 In this case, the values of d_nz,o_nz are: 2395 .vb 2396 proc0 : dnz = 2, o_nz = 2 2397 proc1 : dnz = 3, o_nz = 2 2398 proc2 : dnz = 1, o_nz = 4 2399 .ve 2400 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2401 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2402 for proc3. i.e we are using 12+15+10=37 storage locations to store 2403 34 values. 2404 2405 When d_nnz, o_nnz parameters are specified, the storage is specified 2406 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2407 In the above case the values for d_nnz,o_nnz are: 2408 .vb 2409 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2410 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2411 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2412 .ve 2413 Here the space allocated is sum of all the above values i.e 34, and 2414 hence pre-allocation is perfect. 2415 2416 Level: intermediate 2417 2418 .keywords: matrix, aij, compressed row, sparse, parallel 2419 2420 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2421 MPIAIJ 2422 @*/ 2423 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2424 { 2425 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2426 2427 PetscFunctionBegin; 2428 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2429 if (f) { 2430 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2431 } 2432 PetscFunctionReturn(0); 2433 } 2434 2435 #undef __FUNCT__ 2436 #define __FUNCT__ "MatCreateMPIAIJ" 2437 /*@C 2438 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2439 (the default parallel PETSc format). For good matrix assembly performance 2440 the user should preallocate the matrix storage by setting the parameters 2441 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2442 performance can be increased by more than a factor of 50. 2443 2444 Collective on MPI_Comm 2445 2446 Input Parameters: 2447 + comm - MPI communicator 2448 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2449 This value should be the same as the local size used in creating the 2450 y vector for the matrix-vector product y = Ax. 2451 . n - This value should be the same as the local size used in creating the 2452 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2453 calculated if N is given) For square matrices n is almost always m. 2454 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2455 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2456 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2457 (same value is used for all local rows) 2458 . d_nnz - array containing the number of nonzeros in the various rows of the 2459 DIAGONAL portion of the local submatrix (possibly different for each row) 2460 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2461 The size of this array is equal to the number of local rows, i.e 'm'. 2462 You must leave room for the diagonal entry even if it is zero. 2463 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2464 submatrix (same value is used for all local rows). 2465 - o_nnz - array containing the number of nonzeros in the various rows of the 2466 OFF-DIAGONAL portion of the local submatrix (possibly different for 2467 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2468 structure. The size of this array is equal to the number 2469 of local rows, i.e 'm'. 2470 2471 Output Parameter: 2472 . A - the matrix 2473 2474 Notes: 2475 If the *_nnz parameter is given then the *_nz parameter is ignored 2476 2477 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2478 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2479 storage requirements for this matrix. 2480 2481 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2482 processor than it must be used on all processors that share the object for 2483 that argument. 2484 2485 The user MUST specify either the local or global matrix dimensions 2486 (possibly both). 2487 2488 The parallel matrix is partitioned such that the first m0 rows belong to 2489 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2490 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2491 2492 The DIAGONAL portion of the local submatrix of a processor can be defined 2493 as the submatrix which is obtained by extraction the part corresponding 2494 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2495 first row that belongs to the processor, and r2 is the last row belonging 2496 to the this processor. This is a square mxm matrix. The remaining portion 2497 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2498 2499 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2500 2501 When calling this routine with a single process communicator, a matrix of 2502 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2503 type of communicator, use the construction mechanism: 2504 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2505 2506 By default, this format uses inodes (identical nodes) when possible. 2507 We search for consecutive rows with the same nonzero structure, thereby 2508 reusing matrix information to achieve increased efficiency. 2509 2510 Options Database Keys: 2511 + -mat_no_inode - Do not use inodes 2512 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 2513 - -mat_aij_oneindex - Internally use indexing starting at 1 2514 rather than 0. Note that when calling MatSetValues(), 2515 the user still MUST index entries starting at 0! 2516 2517 2518 Example usage: 2519 2520 Consider the following 8x8 matrix with 34 non-zero values, that is 2521 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2522 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2523 as follows: 2524 2525 .vb 2526 1 2 0 | 0 3 0 | 0 4 2527 Proc0 0 5 6 | 7 0 0 | 8 0 2528 9 0 10 | 11 0 0 | 12 0 2529 ------------------------------------- 2530 13 0 14 | 15 16 17 | 0 0 2531 Proc1 0 18 0 | 19 20 21 | 0 0 2532 0 0 0 | 22 23 0 | 24 0 2533 ------------------------------------- 2534 Proc2 25 26 27 | 0 0 28 | 29 0 2535 30 0 0 | 31 32 33 | 0 34 2536 .ve 2537 2538 This can be represented as a collection of submatrices as: 2539 2540 .vb 2541 A B C 2542 D E F 2543 G H I 2544 .ve 2545 2546 Where the submatrices A,B,C are owned by proc0, D,E,F are 2547 owned by proc1, G,H,I are owned by proc2. 2548 2549 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2550 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2551 The 'M','N' parameters are 8,8, and have the same values on all procs. 2552 2553 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2554 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2555 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2556 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2557 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2558 matrix, ans [DF] as another SeqAIJ matrix. 2559 2560 When d_nz, o_nz parameters are specified, d_nz storage elements are 2561 allocated for every row of the local diagonal submatrix, and o_nz 2562 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2563 One way to choose d_nz and o_nz is to use the max nonzerors per local 2564 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2565 In this case, the values of d_nz,o_nz are: 2566 .vb 2567 proc0 : dnz = 2, o_nz = 2 2568 proc1 : dnz = 3, o_nz = 2 2569 proc2 : dnz = 1, o_nz = 4 2570 .ve 2571 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2572 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2573 for proc3. i.e we are using 12+15+10=37 storage locations to store 2574 34 values. 2575 2576 When d_nnz, o_nnz parameters are specified, the storage is specified 2577 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2578 In the above case the values for d_nnz,o_nnz are: 2579 .vb 2580 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2581 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2582 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2583 .ve 2584 Here the space allocated is sum of all the above values i.e 34, and 2585 hence pre-allocation is perfect. 2586 2587 Level: intermediate 2588 2589 .keywords: matrix, aij, compressed row, sparse, parallel 2590 2591 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2592 MPIAIJ 2593 @*/ 2594 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2595 { 2596 PetscErrorCode ierr; 2597 PetscMPIInt size; 2598 2599 PetscFunctionBegin; 2600 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2601 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2602 if (size > 1) { 2603 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2604 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2605 } else { 2606 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2607 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2608 } 2609 PetscFunctionReturn(0); 2610 } 2611 2612 #undef __FUNCT__ 2613 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2614 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2615 { 2616 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2617 2618 PetscFunctionBegin; 2619 *Ad = a->A; 2620 *Ao = a->B; 2621 *colmap = a->garray; 2622 PetscFunctionReturn(0); 2623 } 2624 2625 #undef __FUNCT__ 2626 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2627 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2628 { 2629 PetscErrorCode ierr; 2630 PetscInt i; 2631 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2632 2633 PetscFunctionBegin; 2634 if (coloring->ctype == IS_COLORING_LOCAL) { 2635 ISColoringValue *allcolors,*colors; 2636 ISColoring ocoloring; 2637 2638 /* set coloring for diagonal portion */ 2639 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2640 2641 /* set coloring for off-diagonal portion */ 2642 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2643 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2644 for (i=0; i<a->B->n; i++) { 2645 colors[i] = allcolors[a->garray[i]]; 2646 } 2647 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2648 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2649 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2650 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2651 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2652 ISColoringValue *colors; 2653 PetscInt *larray; 2654 ISColoring ocoloring; 2655 2656 /* set coloring for diagonal portion */ 2657 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2658 for (i=0; i<a->A->n; i++) { 2659 larray[i] = i + a->cstart; 2660 } 2661 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2662 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2663 for (i=0; i<a->A->n; i++) { 2664 colors[i] = coloring->colors[larray[i]]; 2665 } 2666 ierr = PetscFree(larray);CHKERRQ(ierr); 2667 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2668 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2669 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2670 2671 /* set coloring for off-diagonal portion */ 2672 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2673 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2674 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2675 for (i=0; i<a->B->n; i++) { 2676 colors[i] = coloring->colors[larray[i]]; 2677 } 2678 ierr = PetscFree(larray);CHKERRQ(ierr); 2679 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2680 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2681 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2682 } else { 2683 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2684 } 2685 2686 PetscFunctionReturn(0); 2687 } 2688 2689 #if defined(PETSC_HAVE_ADIC) 2690 #undef __FUNCT__ 2691 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2692 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2693 { 2694 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2695 PetscErrorCode ierr; 2696 2697 PetscFunctionBegin; 2698 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2699 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2700 PetscFunctionReturn(0); 2701 } 2702 #endif 2703 2704 #undef __FUNCT__ 2705 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2706 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2707 { 2708 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2709 PetscErrorCode ierr; 2710 2711 PetscFunctionBegin; 2712 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2713 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2714 PetscFunctionReturn(0); 2715 } 2716 2717 #undef __FUNCT__ 2718 #define __FUNCT__ "MatMerge" 2719 /*@C 2720 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2721 matrices from each processor 2722 2723 Collective on MPI_Comm 2724 2725 Input Parameters: 2726 + comm - the communicators the parallel matrix will live on 2727 . inmat - the input sequential matrices 2728 . n - number of local columns (or PETSC_DECIDE) 2729 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2730 2731 Output Parameter: 2732 . outmat - the parallel matrix generated 2733 2734 Level: advanced 2735 2736 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2737 2738 @*/ 2739 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2740 { 2741 PetscErrorCode ierr; 2742 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2743 PetscInt *indx; 2744 PetscScalar *values; 2745 PetscMap columnmap,rowmap; 2746 2747 PetscFunctionBegin; 2748 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2749 /* 2750 PetscMPIInt rank; 2751 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2752 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2753 */ 2754 if (scall == MAT_INITIAL_MATRIX){ 2755 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2756 if (n == PETSC_DECIDE){ 2757 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2758 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2759 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2760 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2761 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2762 } 2763 2764 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2765 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2766 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2767 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2768 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2769 2770 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2771 for (i=0;i<m;i++) { 2772 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2773 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2774 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2775 } 2776 /* This routine will ONLY return MPIAIJ type matrix */ 2777 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2778 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2779 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2780 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2781 2782 } else if (scall == MAT_REUSE_MATRIX){ 2783 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2784 } else { 2785 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2786 } 2787 2788 for (i=0;i<m;i++) { 2789 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2790 I = i + rstart; 2791 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2792 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2793 } 2794 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2795 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2796 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2797 2798 PetscFunctionReturn(0); 2799 } 2800 2801 #undef __FUNCT__ 2802 #define __FUNCT__ "MatFileSplit" 2803 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2804 { 2805 PetscErrorCode ierr; 2806 PetscMPIInt rank; 2807 PetscInt m,N,i,rstart,nnz; 2808 size_t len; 2809 const PetscInt *indx; 2810 PetscViewer out; 2811 char *name; 2812 Mat B; 2813 const PetscScalar *values; 2814 2815 PetscFunctionBegin; 2816 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2817 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2818 /* Should this be the type of the diagonal block of A? */ 2819 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2820 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2821 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2822 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2823 for (i=0;i<m;i++) { 2824 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2825 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2826 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2827 } 2828 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2829 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2830 2831 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2832 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2833 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2834 sprintf(name,"%s.%d",outfile,rank); 2835 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2836 ierr = PetscFree(name); 2837 ierr = MatView(B,out);CHKERRQ(ierr); 2838 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2839 ierr = MatDestroy(B);CHKERRQ(ierr); 2840 PetscFunctionReturn(0); 2841 } 2842 2843 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2844 #undef __FUNCT__ 2845 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2846 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2847 { 2848 PetscErrorCode ierr; 2849 Mat_Merge_SeqsToMPI *merge; 2850 PetscObjectContainer container; 2851 2852 PetscFunctionBegin; 2853 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2854 if (container) { 2855 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2856 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2857 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2858 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2859 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2860 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2861 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2862 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2863 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2864 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2865 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2866 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2867 2868 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2869 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2870 } 2871 ierr = PetscFree(merge);CHKERRQ(ierr); 2872 2873 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2874 PetscFunctionReturn(0); 2875 } 2876 2877 #include "src/mat/utils/freespace.h" 2878 #include "petscbt.h" 2879 #undef __FUNCT__ 2880 #define __FUNCT__ "MatMerge_SeqsToMPI" 2881 /*@C 2882 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2883 matrices from each processor 2884 2885 Collective on MPI_Comm 2886 2887 Input Parameters: 2888 + comm - the communicators the parallel matrix will live on 2889 . seqmat - the input sequential matrices 2890 . m - number of local rows (or PETSC_DECIDE) 2891 . n - number of local columns (or PETSC_DECIDE) 2892 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2893 2894 Output Parameter: 2895 . mpimat - the parallel matrix generated 2896 2897 Level: advanced 2898 2899 Notes: 2900 The dimensions of the sequential matrix in each processor MUST be the same. 2901 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2902 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2903 @*/ 2904 static PetscEvent logkey_seqstompinum = 0; 2905 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2906 { 2907 PetscErrorCode ierr; 2908 MPI_Comm comm=mpimat->comm; 2909 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2910 PetscMPIInt size,rank,taga,*len_s; 2911 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2912 PetscInt proc,m; 2913 PetscInt **buf_ri,**buf_rj; 2914 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2915 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2916 MPI_Request *s_waits,*r_waits; 2917 MPI_Status *status; 2918 MatScalar *aa=a->a,**abuf_r,*ba_i; 2919 Mat_Merge_SeqsToMPI *merge; 2920 PetscObjectContainer container; 2921 2922 PetscFunctionBegin; 2923 if (!logkey_seqstompinum) { 2924 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2925 } 2926 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2927 2928 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2929 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2930 2931 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2932 if (container) { 2933 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2934 } 2935 bi = merge->bi; 2936 bj = merge->bj; 2937 buf_ri = merge->buf_ri; 2938 buf_rj = merge->buf_rj; 2939 2940 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2941 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2942 len_s = merge->len_s; 2943 2944 /* send and recv matrix values */ 2945 /*-----------------------------*/ 2946 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2947 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2948 2949 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2950 for (proc=0,k=0; proc<size; proc++){ 2951 if (!len_s[proc]) continue; 2952 i = owners[proc]; 2953 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2954 k++; 2955 } 2956 2957 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2958 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2959 ierr = PetscFree(status);CHKERRQ(ierr); 2960 2961 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2962 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2963 2964 /* insert mat values of mpimat */ 2965 /*----------------------------*/ 2966 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2967 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2968 nextrow = buf_ri_k + merge->nrecv; 2969 nextai = nextrow + merge->nrecv; 2970 2971 for (k=0; k<merge->nrecv; k++){ 2972 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2973 nrows = *(buf_ri_k[k]); 2974 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2975 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2976 } 2977 2978 /* set values of ba */ 2979 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2980 for (i=0; i<m; i++) { 2981 arow = owners[rank] + i; 2982 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2983 bnzi = bi[i+1] - bi[i]; 2984 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2985 2986 /* add local non-zero vals of this proc's seqmat into ba */ 2987 anzi = ai[arow+1] - ai[arow]; 2988 aj = a->j + ai[arow]; 2989 aa = a->a + ai[arow]; 2990 nextaj = 0; 2991 for (j=0; nextaj<anzi; j++){ 2992 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2993 ba_i[j] += aa[nextaj++]; 2994 } 2995 } 2996 2997 /* add received vals into ba */ 2998 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2999 /* i-th row */ 3000 if (i == *nextrow[k]) { 3001 anzi = *(nextai[k]+1) - *nextai[k]; 3002 aj = buf_rj[k] + *(nextai[k]); 3003 aa = abuf_r[k] + *(nextai[k]); 3004 nextaj = 0; 3005 for (j=0; nextaj<anzi; j++){ 3006 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3007 ba_i[j] += aa[nextaj++]; 3008 } 3009 } 3010 nextrow[k]++; nextai[k]++; 3011 } 3012 } 3013 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3014 } 3015 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3016 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3017 3018 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3019 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3020 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3021 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3022 PetscFunctionReturn(0); 3023 } 3024 static PetscEvent logkey_seqstompisym = 0; 3025 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3026 { 3027 PetscErrorCode ierr; 3028 Mat B_mpi; 3029 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3030 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3031 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3032 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3033 PetscInt len,proc,*dnz,*onz; 3034 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3035 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3036 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3037 MPI_Status *status; 3038 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3039 PetscBT lnkbt; 3040 Mat_Merge_SeqsToMPI *merge; 3041 PetscObjectContainer container; 3042 3043 PetscFunctionBegin; 3044 if (!logkey_seqstompisym) { 3045 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3046 } 3047 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3048 3049 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3050 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3051 3052 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3053 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3054 3055 /* determine row ownership */ 3056 /*---------------------------------------------------------*/ 3057 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3058 if (m == PETSC_DECIDE) { 3059 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3060 } else { 3061 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3062 } 3063 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3064 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3065 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3066 3067 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3068 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3069 3070 /* determine the number of messages to send, their lengths */ 3071 /*---------------------------------------------------------*/ 3072 len_s = merge->len_s; 3073 3074 len = 0; /* length of buf_si[] */ 3075 merge->nsend = 0; 3076 for (proc=0; proc<size; proc++){ 3077 len_si[proc] = 0; 3078 if (proc == rank){ 3079 len_s[proc] = 0; 3080 } else { 3081 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3082 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3083 } 3084 if (len_s[proc]) { 3085 merge->nsend++; 3086 nrows = 0; 3087 for (i=owners[proc]; i<owners[proc+1]; i++){ 3088 if (ai[i+1] > ai[i]) nrows++; 3089 } 3090 len_si[proc] = 2*(nrows+1); 3091 len += len_si[proc]; 3092 } 3093 } 3094 3095 /* determine the number and length of messages to receive for ij-structure */ 3096 /*-------------------------------------------------------------------------*/ 3097 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3098 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3099 3100 /* post the Irecv of j-structure */ 3101 /*-------------------------------*/ 3102 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3103 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3104 3105 /* post the Isend of j-structure */ 3106 /*--------------------------------*/ 3107 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3108 sj_waits = si_waits + merge->nsend; 3109 3110 for (proc=0, k=0; proc<size; proc++){ 3111 if (!len_s[proc]) continue; 3112 i = owners[proc]; 3113 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3114 k++; 3115 } 3116 3117 /* receives and sends of j-structure are complete */ 3118 /*------------------------------------------------*/ 3119 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3120 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3121 3122 /* send and recv i-structure */ 3123 /*---------------------------*/ 3124 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3125 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3126 3127 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3128 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3129 for (proc=0,k=0; proc<size; proc++){ 3130 if (!len_s[proc]) continue; 3131 /* form outgoing message for i-structure: 3132 buf_si[0]: nrows to be sent 3133 [1:nrows]: row index (global) 3134 [nrows+1:2*nrows+1]: i-structure index 3135 */ 3136 /*-------------------------------------------*/ 3137 nrows = len_si[proc]/2 - 1; 3138 buf_si_i = buf_si + nrows+1; 3139 buf_si[0] = nrows; 3140 buf_si_i[0] = 0; 3141 nrows = 0; 3142 for (i=owners[proc]; i<owners[proc+1]; i++){ 3143 anzi = ai[i+1] - ai[i]; 3144 if (anzi) { 3145 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3146 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3147 nrows++; 3148 } 3149 } 3150 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3151 k++; 3152 buf_si += len_si[proc]; 3153 } 3154 3155 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3156 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3157 3158 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3159 for (i=0; i<merge->nrecv; i++){ 3160 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3161 } 3162 3163 ierr = PetscFree(len_si);CHKERRQ(ierr); 3164 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3165 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3166 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3167 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3168 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3169 ierr = PetscFree(status);CHKERRQ(ierr); 3170 3171 /* compute a local seq matrix in each processor */ 3172 /*----------------------------------------------*/ 3173 /* allocate bi array and free space for accumulating nonzero column info */ 3174 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3175 bi[0] = 0; 3176 3177 /* create and initialize a linked list */ 3178 nlnk = N+1; 3179 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3180 3181 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3182 len = 0; 3183 len = ai[owners[rank+1]] - ai[owners[rank]]; 3184 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3185 current_space = free_space; 3186 3187 /* determine symbolic info for each local row */ 3188 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3189 nextrow = buf_ri_k + merge->nrecv; 3190 nextai = nextrow + merge->nrecv; 3191 for (k=0; k<merge->nrecv; k++){ 3192 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3193 nrows = *buf_ri_k[k]; 3194 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3195 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3196 } 3197 3198 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3199 len = 0; 3200 for (i=0;i<m;i++) { 3201 bnzi = 0; 3202 /* add local non-zero cols of this proc's seqmat into lnk */ 3203 arow = owners[rank] + i; 3204 anzi = ai[arow+1] - ai[arow]; 3205 aj = a->j + ai[arow]; 3206 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3207 bnzi += nlnk; 3208 /* add received col data into lnk */ 3209 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3210 if (i == *nextrow[k]) { /* i-th row */ 3211 anzi = *(nextai[k]+1) - *nextai[k]; 3212 aj = buf_rj[k] + *nextai[k]; 3213 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3214 bnzi += nlnk; 3215 nextrow[k]++; nextai[k]++; 3216 } 3217 } 3218 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3219 3220 /* if free space is not available, make more free space */ 3221 if (current_space->local_remaining<bnzi) { 3222 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3223 nspacedouble++; 3224 } 3225 /* copy data into free space, then initialize lnk */ 3226 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3227 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3228 3229 current_space->array += bnzi; 3230 current_space->local_used += bnzi; 3231 current_space->local_remaining -= bnzi; 3232 3233 bi[i+1] = bi[i] + bnzi; 3234 } 3235 3236 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3237 3238 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3239 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3240 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3241 3242 /* create symbolic parallel matrix B_mpi */ 3243 /*---------------------------------------*/ 3244 if (n==PETSC_DECIDE) { 3245 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3246 } else { 3247 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3248 } 3249 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3250 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3251 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3252 3253 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3254 B_mpi->assembled = PETSC_FALSE; 3255 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3256 merge->bi = bi; 3257 merge->bj = bj; 3258 merge->buf_ri = buf_ri; 3259 merge->buf_rj = buf_rj; 3260 merge->coi = PETSC_NULL; 3261 merge->coj = PETSC_NULL; 3262 merge->owners_co = PETSC_NULL; 3263 3264 /* attach the supporting struct to B_mpi for reuse */ 3265 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3266 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3267 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3268 *mpimat = B_mpi; 3269 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3270 PetscFunctionReturn(0); 3271 } 3272 3273 static PetscEvent logkey_seqstompi = 0; 3274 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3275 { 3276 PetscErrorCode ierr; 3277 3278 PetscFunctionBegin; 3279 if (!logkey_seqstompi) { 3280 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3281 } 3282 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3283 if (scall == MAT_INITIAL_MATRIX){ 3284 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3285 } 3286 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3287 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3288 PetscFunctionReturn(0); 3289 } 3290 static PetscEvent logkey_getlocalmat = 0; 3291 #undef __FUNCT__ 3292 #define __FUNCT__ "MatGetLocalMat" 3293 /*@C 3294 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3295 3296 Not Collective 3297 3298 Input Parameters: 3299 + A - the matrix 3300 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3301 3302 Output Parameter: 3303 . A_loc - the local sequential matrix generated 3304 3305 Level: developer 3306 3307 @*/ 3308 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3309 { 3310 PetscErrorCode ierr; 3311 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3312 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3313 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3314 PetscScalar *aa=a->a,*ba=b->a,*ca; 3315 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3316 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3317 3318 PetscFunctionBegin; 3319 if (!logkey_getlocalmat) { 3320 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3321 } 3322 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3323 if (scall == MAT_INITIAL_MATRIX){ 3324 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3325 ci[0] = 0; 3326 for (i=0; i<am; i++){ 3327 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3328 } 3329 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3330 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3331 k = 0; 3332 for (i=0; i<am; i++) { 3333 ncols_o = bi[i+1] - bi[i]; 3334 ncols_d = ai[i+1] - ai[i]; 3335 /* off-diagonal portion of A */ 3336 for (jo=0; jo<ncols_o; jo++) { 3337 col = cmap[*bj]; 3338 if (col >= cstart) break; 3339 cj[k] = col; bj++; 3340 ca[k++] = *ba++; 3341 } 3342 /* diagonal portion of A */ 3343 for (j=0; j<ncols_d; j++) { 3344 cj[k] = cstart + *aj++; 3345 ca[k++] = *aa++; 3346 } 3347 /* off-diagonal portion of A */ 3348 for (j=jo; j<ncols_o; j++) { 3349 cj[k] = cmap[*bj++]; 3350 ca[k++] = *ba++; 3351 } 3352 } 3353 /* put together the new matrix */ 3354 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3355 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3356 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3357 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3358 mat->freedata = PETSC_TRUE; 3359 mat->nonew = 0; 3360 } else if (scall == MAT_REUSE_MATRIX){ 3361 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3362 ci = mat->i; cj = mat->j; ca = mat->a; 3363 for (i=0; i<am; i++) { 3364 /* off-diagonal portion of A */ 3365 ncols_o = bi[i+1] - bi[i]; 3366 for (jo=0; jo<ncols_o; jo++) { 3367 col = cmap[*bj]; 3368 if (col >= cstart) break; 3369 *ca++ = *ba++; bj++; 3370 } 3371 /* diagonal portion of A */ 3372 ncols_d = ai[i+1] - ai[i]; 3373 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3374 /* off-diagonal portion of A */ 3375 for (j=jo; j<ncols_o; j++) { 3376 *ca++ = *ba++; bj++; 3377 } 3378 } 3379 } else { 3380 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3381 } 3382 3383 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3384 PetscFunctionReturn(0); 3385 } 3386 3387 static PetscEvent logkey_getlocalmatcondensed = 0; 3388 #undef __FUNCT__ 3389 #define __FUNCT__ "MatGetLocalMatCondensed" 3390 /*@C 3391 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3392 3393 Not Collective 3394 3395 Input Parameters: 3396 + A - the matrix 3397 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3398 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3399 3400 Output Parameter: 3401 . A_loc - the local sequential matrix generated 3402 3403 Level: developer 3404 3405 @*/ 3406 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3407 { 3408 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3409 PetscErrorCode ierr; 3410 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3411 IS isrowa,iscola; 3412 Mat *aloc; 3413 3414 PetscFunctionBegin; 3415 if (!logkey_getlocalmatcondensed) { 3416 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3417 } 3418 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3419 if (!row){ 3420 start = a->rstart; end = a->rend; 3421 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3422 } else { 3423 isrowa = *row; 3424 } 3425 if (!col){ 3426 start = a->cstart; 3427 cmap = a->garray; 3428 nzA = a->A->n; 3429 nzB = a->B->n; 3430 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3431 ncols = 0; 3432 for (i=0; i<nzB; i++) { 3433 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3434 else break; 3435 } 3436 imark = i; 3437 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3438 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3439 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3440 ierr = PetscFree(idx);CHKERRQ(ierr); 3441 } else { 3442 iscola = *col; 3443 } 3444 if (scall != MAT_INITIAL_MATRIX){ 3445 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3446 aloc[0] = *A_loc; 3447 } 3448 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3449 *A_loc = aloc[0]; 3450 ierr = PetscFree(aloc);CHKERRQ(ierr); 3451 if (!row){ 3452 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3453 } 3454 if (!col){ 3455 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3456 } 3457 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3458 PetscFunctionReturn(0); 3459 } 3460 3461 static PetscEvent logkey_GetBrowsOfAcols = 0; 3462 #undef __FUNCT__ 3463 #define __FUNCT__ "MatGetBrowsOfAcols" 3464 /*@C 3465 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3466 3467 Collective on Mat 3468 3469 Input Parameters: 3470 + A,B - the matrices in mpiaij format 3471 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3472 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3473 3474 Output Parameter: 3475 + rowb, colb - index sets of rows and columns of B to extract 3476 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3477 - B_seq - the sequential matrix generated 3478 3479 Level: developer 3480 3481 @*/ 3482 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3483 { 3484 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3485 PetscErrorCode ierr; 3486 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3487 IS isrowb,iscolb; 3488 Mat *bseq; 3489 3490 PetscFunctionBegin; 3491 if (a->cstart != b->rstart || a->cend != b->rend){ 3492 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3493 } 3494 if (!logkey_GetBrowsOfAcols) { 3495 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3496 } 3497 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3498 3499 if (scall == MAT_INITIAL_MATRIX){ 3500 start = a->cstart; 3501 cmap = a->garray; 3502 nzA = a->A->n; 3503 nzB = a->B->n; 3504 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3505 ncols = 0; 3506 for (i=0; i<nzB; i++) { /* row < local row index */ 3507 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3508 else break; 3509 } 3510 imark = i; 3511 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3512 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3513 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3514 ierr = PetscFree(idx);CHKERRQ(ierr); 3515 *brstart = imark; 3516 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3517 } else { 3518 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3519 isrowb = *rowb; iscolb = *colb; 3520 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3521 bseq[0] = *B_seq; 3522 } 3523 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3524 *B_seq = bseq[0]; 3525 ierr = PetscFree(bseq);CHKERRQ(ierr); 3526 if (!rowb){ 3527 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3528 } else { 3529 *rowb = isrowb; 3530 } 3531 if (!colb){ 3532 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3533 } else { 3534 *colb = iscolb; 3535 } 3536 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3537 PetscFunctionReturn(0); 3538 } 3539 3540 static PetscEvent logkey_GetBrowsOfAocols = 0; 3541 #undef __FUNCT__ 3542 #define __FUNCT__ "MatGetBrowsOfAoCols" 3543 /*@C 3544 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3545 of the OFF-DIAGONAL portion of local A 3546 3547 Collective on Mat 3548 3549 Input Parameters: 3550 + A,B - the matrices in mpiaij format 3551 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3552 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3553 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3554 3555 Output Parameter: 3556 + B_oth - the sequential matrix generated 3557 3558 Level: developer 3559 3560 @*/ 3561 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3562 { 3563 VecScatter_MPI_General *gen_to,*gen_from; 3564 PetscErrorCode ierr; 3565 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3566 Mat_SeqAIJ *b_oth; 3567 VecScatter ctx=a->Mvctx; 3568 MPI_Comm comm=ctx->comm; 3569 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3570 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3571 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3572 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3573 MPI_Request *rwaits,*swaits; 3574 MPI_Status *sstatus,rstatus; 3575 PetscInt *cols; 3576 PetscScalar *vals; 3577 PetscMPIInt j; 3578 3579 PetscFunctionBegin; 3580 if (a->cstart != b->rstart || a->cend != b->rend){ 3581 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3582 } 3583 if (!logkey_GetBrowsOfAocols) { 3584 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3585 } 3586 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3587 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3588 3589 gen_to = (VecScatter_MPI_General*)ctx->todata; 3590 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3591 rvalues = gen_from->values; /* holds the length of sending row */ 3592 svalues = gen_to->values; /* holds the length of receiving row */ 3593 nrecvs = gen_from->n; 3594 nsends = gen_to->n; 3595 rwaits = gen_from->requests; 3596 swaits = gen_to->requests; 3597 srow = gen_to->indices; /* local row index to be sent */ 3598 rstarts = gen_from->starts; 3599 sstarts = gen_to->starts; 3600 rprocs = gen_from->procs; 3601 sprocs = gen_to->procs; 3602 sstatus = gen_to->sstatus; 3603 3604 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3605 if (scall == MAT_INITIAL_MATRIX){ 3606 /* i-array */ 3607 /*---------*/ 3608 /* post receives */ 3609 for (i=0; i<nrecvs; i++){ 3610 rowlen = (PetscInt*)rvalues + rstarts[i]; 3611 nrows = rstarts[i+1]-rstarts[i]; 3612 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3613 } 3614 3615 /* pack the outgoing message */ 3616 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3617 rstartsj = sstartsj + nsends +1; 3618 sstartsj[0] = 0; rstartsj[0] = 0; 3619 len = 0; /* total length of j or a array to be sent */ 3620 k = 0; 3621 for (i=0; i<nsends; i++){ 3622 rowlen = (PetscInt*)svalues + sstarts[i]; 3623 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3624 for (j=0; j<nrows; j++) { 3625 row = srow[k] + b->rowners[rank]; /* global row idx */ 3626 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3627 len += rowlen[j]; 3628 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3629 k++; 3630 } 3631 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3632 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3633 } 3634 /* recvs and sends of i-array are completed */ 3635 i = nrecvs; 3636 while (i--) { 3637 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3638 } 3639 if (nsends) { 3640 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3641 } 3642 /* allocate buffers for sending j and a arrays */ 3643 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3644 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3645 3646 /* create i-array of B_oth */ 3647 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3648 b_othi[0] = 0; 3649 len = 0; /* total length of j or a array to be received */ 3650 k = 0; 3651 for (i=0; i<nrecvs; i++){ 3652 rowlen = (PetscInt*)rvalues + rstarts[i]; 3653 nrows = rstarts[i+1]-rstarts[i]; 3654 for (j=0; j<nrows; j++) { 3655 b_othi[k+1] = b_othi[k] + rowlen[j]; 3656 len += rowlen[j]; k++; 3657 } 3658 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3659 } 3660 3661 /* allocate space for j and a arrrays of B_oth */ 3662 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3663 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3664 3665 /* j-array */ 3666 /*---------*/ 3667 /* post receives of j-array */ 3668 for (i=0; i<nrecvs; i++){ 3669 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3670 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3671 } 3672 k = 0; 3673 for (i=0; i<nsends; i++){ 3674 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3675 bufJ = bufj+sstartsj[i]; 3676 for (j=0; j<nrows; j++) { 3677 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3678 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3679 for (l=0; l<ncols; l++){ 3680 *bufJ++ = cols[l]; 3681 } 3682 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3683 } 3684 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3685 } 3686 3687 /* recvs and sends of j-array are completed */ 3688 i = nrecvs; 3689 while (i--) { 3690 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3691 } 3692 if (nsends) { 3693 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3694 } 3695 } else if (scall == MAT_REUSE_MATRIX){ 3696 sstartsj = *startsj; 3697 rstartsj = sstartsj + nsends +1; 3698 bufa = *bufa_ptr; 3699 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3700 b_otha = b_oth->a; 3701 } else { 3702 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3703 } 3704 3705 /* a-array */ 3706 /*---------*/ 3707 /* post receives of a-array */ 3708 for (i=0; i<nrecvs; i++){ 3709 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3710 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3711 } 3712 k = 0; 3713 for (i=0; i<nsends; i++){ 3714 nrows = sstarts[i+1]-sstarts[i]; 3715 bufA = bufa+sstartsj[i]; 3716 for (j=0; j<nrows; j++) { 3717 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3718 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3719 for (l=0; l<ncols; l++){ 3720 *bufA++ = vals[l]; 3721 } 3722 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3723 3724 } 3725 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3726 } 3727 /* recvs and sends of a-array are completed */ 3728 i = nrecvs; 3729 while (i--) { 3730 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3731 } 3732 if (nsends) { 3733 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3734 } 3735 3736 if (scall == MAT_INITIAL_MATRIX){ 3737 /* put together the new matrix */ 3738 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3739 3740 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3741 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3742 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3743 b_oth->freedata = PETSC_TRUE; 3744 b_oth->nonew = 0; 3745 3746 ierr = PetscFree(bufj);CHKERRQ(ierr); 3747 if (!startsj || !bufa_ptr){ 3748 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3749 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3750 } else { 3751 *startsj = sstartsj; 3752 *bufa_ptr = bufa; 3753 } 3754 } 3755 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3756 3757 PetscFunctionReturn(0); 3758 } 3759 3760 /*MC 3761 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3762 3763 Options Database Keys: 3764 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3765 3766 Level: beginner 3767 3768 .seealso: MatCreateMPIAIJ 3769 M*/ 3770 3771 EXTERN_C_BEGIN 3772 #undef __FUNCT__ 3773 #define __FUNCT__ "MatCreate_MPIAIJ" 3774 PetscErrorCode MatCreate_MPIAIJ(Mat B) 3775 { 3776 Mat_MPIAIJ *b; 3777 PetscErrorCode ierr; 3778 PetscInt i; 3779 PetscMPIInt size; 3780 3781 PetscFunctionBegin; 3782 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3783 3784 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3785 B->data = (void*)b; 3786 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3787 B->factor = 0; 3788 B->bs = 1; 3789 B->assembled = PETSC_FALSE; 3790 B->mapping = 0; 3791 3792 B->insertmode = NOT_SET_VALUES; 3793 b->size = size; 3794 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3795 3796 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3797 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3798 3799 /* the information in the maps duplicates the information computed below, eventually 3800 we should remove the duplicate information that is not contained in the maps */ 3801 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3802 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3803 3804 /* build local table of row and column ownerships */ 3805 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3806 ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3807 b->cowners = b->rowners + b->size + 2; 3808 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3809 b->rowners[0] = 0; 3810 for (i=2; i<=b->size; i++) { 3811 b->rowners[i] += b->rowners[i-1]; 3812 } 3813 b->rstart = b->rowners[b->rank]; 3814 b->rend = b->rowners[b->rank+1]; 3815 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3816 b->cowners[0] = 0; 3817 for (i=2; i<=b->size; i++) { 3818 b->cowners[i] += b->cowners[i-1]; 3819 } 3820 b->cstart = b->cowners[b->rank]; 3821 b->cend = b->cowners[b->rank+1]; 3822 3823 /* build cache for off array entries formed */ 3824 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3825 b->donotstash = PETSC_FALSE; 3826 b->colmap = 0; 3827 b->garray = 0; 3828 b->roworiented = PETSC_TRUE; 3829 3830 /* stuff used for matrix vector multiply */ 3831 b->lvec = PETSC_NULL; 3832 b->Mvctx = PETSC_NULL; 3833 3834 /* stuff for MatGetRow() */ 3835 b->rowindices = 0; 3836 b->rowvalues = 0; 3837 b->getrowactive = PETSC_FALSE; 3838 3839 /* Explicitly create 2 MATSEQAIJ matrices. */ 3840 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3841 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3842 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3843 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3844 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3845 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3846 3847 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3848 "MatStoreValues_MPIAIJ", 3849 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3850 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3851 "MatRetrieveValues_MPIAIJ", 3852 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3853 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3854 "MatGetDiagonalBlock_MPIAIJ", 3855 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3856 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3857 "MatIsTranspose_MPIAIJ", 3858 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3859 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3860 "MatMPIAIJSetPreallocation_MPIAIJ", 3861 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3862 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3863 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3864 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3865 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3866 "MatDiagonalScaleLocal_MPIAIJ", 3867 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3868 PetscFunctionReturn(0); 3869 } 3870 EXTERN_C_END 3871