1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 58 if (nonew == 1) goto a_noinsert; \ 59 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 60 if (nrow >= rmax) { \ 61 /* there is no extra room in row, therefore enlarge */ \ 62 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 63 PetscScalar *new_a; \ 64 \ 65 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 66 \ 67 /* malloc new storage space */ \ 68 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 69 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 70 new_j = (PetscInt*)(new_a + new_nz); \ 71 new_i = new_j + new_nz; \ 72 \ 73 /* copy over old data into new slots */ \ 74 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 75 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 76 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 77 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 78 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 79 len*sizeof(PetscInt));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 81 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 82 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 83 /* free up old matrix storage */ \ 84 \ 85 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 86 if (!a->singlemalloc) { \ 87 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 88 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 89 } \ 90 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 91 a->singlemalloc = PETSC_TRUE; \ 92 \ 93 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 94 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 95 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 96 a->maxnz += CHUNKSIZE; \ 97 a->reallocs++; \ 98 } \ 99 N = nrow++ - 1; a->nz++; \ 100 /* shift up all the later entries in this row */ \ 101 for (ii=N; ii>=_i; ii--) { \ 102 rp[ii+1] = rp[ii]; \ 103 ap[ii+1] = ap[ii]; \ 104 } \ 105 rp[_i] = col1; \ 106 ap[_i] = value; \ 107 a_noinsert: ; \ 108 ailen[row] = nrow; \ 109 } 110 111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 112 { \ 113 \ 114 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 115 rmax = bimax[row]; nrow = bilen[row]; \ 116 col1 = col - shift; \ 117 \ 118 low = 0; high = nrow; \ 119 while (high-low > 5) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > col) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > col1) break; \ 126 if (rp[_i] == col1) { \ 127 if (addv == ADD_VALUES) ap[_i] += value; \ 128 else ap[_i] = value; \ 129 goto b_noinsert; \ 130 } \ 131 } \ 132 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 133 if (nonew == 1) goto b_noinsert; \ 134 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 if (nrow >= rmax) { \ 136 /* there is no extra room in row, therefore enlarge */ \ 137 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 138 PetscScalar *new_a; \ 139 \ 140 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 141 \ 142 /* malloc new storage space */ \ 143 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 144 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 145 new_j = (PetscInt*)(new_a + new_nz); \ 146 new_i = new_j + new_nz; \ 147 \ 148 /* copy over old data into new slots */ \ 149 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 150 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 151 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 152 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 153 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 154 len*sizeof(PetscInt));CHKERRQ(ierr); \ 155 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 157 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 158 /* free up old matrix storage */ \ 159 \ 160 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 161 if (!b->singlemalloc) { \ 162 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 163 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 164 } \ 165 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 166 b->singlemalloc = PETSC_TRUE; \ 167 \ 168 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 169 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 170 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 171 b->maxnz += CHUNKSIZE; \ 172 b->reallocs++; \ 173 } \ 174 N = nrow++ - 1; b->nz++; \ 175 /* shift up all the later entries in this row */ \ 176 for (ii=N; ii>=_i; ii--) { \ 177 rp[ii+1] = rp[ii]; \ 178 ap[ii+1] = ap[ii]; \ 179 } \ 180 rp[_i] = col1; \ 181 ap[_i] = value; \ 182 b_noinsert: ; \ 183 bilen[row] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIAIJ" 188 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 191 PetscScalar value; 192 PetscErrorCode ierr; 193 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 194 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 195 PetscTruth roworiented = aij->roworiented; 196 197 /* Some Variables required in the macro */ 198 Mat A = aij->A; 199 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 200 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 201 PetscScalar *aa = a->a; 202 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 203 Mat B = aij->B; 204 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 205 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 206 PetscScalar *ba = b->a; 207 208 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 209 PetscInt nonew = a->nonew,shift=0; 210 PetscScalar *ap; 211 212 PetscFunctionBegin; 213 for (i=0; i<m; i++) { 214 if (im[i] < 0) continue; 215 #if defined(PETSC_USE_DEBUG) 216 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 217 #endif 218 if (im[i] >= rstart && im[i] < rend) { 219 row = im[i] - rstart; 220 for (j=0; j<n; j++) { 221 if (in[j] >= cstart && in[j] < cend){ 222 col = in[j] - cstart; 223 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 224 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 225 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 226 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 227 } else if (in[j] < 0) continue; 228 #if defined(PETSC_USE_DEBUG) 229 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 230 #endif 231 else { 232 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 233 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 234 if (mat->was_assembled) { 235 if (!aij->colmap) { 236 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined (PETSC_USE_CTABLE) 239 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 240 col--; 241 #else 242 col = aij->colmap[in[j]] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 245 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 248 B = aij->B; 249 b = (Mat_SeqAIJ*)B->data; 250 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 251 ba = b->a; 252 } 253 } else col = in[j]; 254 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (!aij->donotstash) { 260 if (roworiented) { 261 if (ignorezeroentries && v[i*n] == 0.0) continue; 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 263 } else { 264 if (ignorezeroentries && v[i] == 0.0) continue; 265 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 266 } 267 } 268 } 269 } 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatGetValues_MPIAIJ" 275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 276 { 277 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 278 PetscErrorCode ierr; 279 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 280 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 281 282 PetscFunctionBegin; 283 for (i=0; i<m; i++) { 284 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 285 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 286 if (idxm[i] >= rstart && idxm[i] < rend) { 287 row = idxm[i] - rstart; 288 for (j=0; j<n; j++) { 289 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 290 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 291 if (idxn[j] >= cstart && idxn[j] < cend){ 292 col = idxn[j] - cstart; 293 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 294 } else { 295 if (!aij->colmap) { 296 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 297 } 298 #if defined (PETSC_USE_CTABLE) 299 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 300 col --; 301 #else 302 col = aij->colmap[idxn[j]] - 1; 303 #endif 304 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 305 else { 306 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 307 } 308 } 309 } 310 } else { 311 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 312 } 313 } 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 320 { 321 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 322 PetscErrorCode ierr; 323 PetscInt nstash,reallocs; 324 InsertMode addv; 325 326 PetscFunctionBegin; 327 if (aij->donotstash) { 328 PetscFunctionReturn(0); 329 } 330 331 /* make sure all processors are either in INSERTMODE or ADDMODE */ 332 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 333 if (addv == (ADD_VALUES|INSERT_VALUES)) { 334 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 335 } 336 mat->insertmode = addv; /* in case this processor had no cache */ 337 338 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 339 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 340 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNCT__ 345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 347 { 348 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 349 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 350 PetscErrorCode ierr; 351 PetscMPIInt n; 352 PetscInt i,j,rstart,ncols,flg; 353 PetscInt *row,*col,other_disassembled; 354 PetscScalar *val; 355 InsertMode addv = mat->insertmode; 356 357 PetscFunctionBegin; 358 if (!aij->donotstash) { 359 while (1) { 360 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 361 if (!flg) break; 362 363 for (i=0; i<n;) { 364 /* Now identify the consecutive vals belonging to the same row */ 365 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 366 if (j < n) ncols = j-i; 367 else ncols = n-i; 368 /* Now assemble all these values with a single function call */ 369 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 370 i = j; 371 } 372 } 373 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 374 } 375 a->compressedrow.use = PETSC_FALSE; 376 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 377 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 378 379 /* determine if any processor has disassembled, if so we must 380 also disassemble ourselfs, in order that we may reassemble. */ 381 /* 382 if nonzero structure of submatrix B cannot change then we know that 383 no processor disassembled thus we can skip this stuff 384 */ 385 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 386 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 387 if (mat->was_assembled && !other_disassembled) { 388 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 389 /* reaccess the b because aij->B was changed */ 390 b = (Mat_SeqAIJ *)aij->B->data; 391 } 392 } 393 394 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 395 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 396 } 397 398 b->inode.use = PETSC_FALSE; 399 b->compressedrow.use = PETSC_TRUE; 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 PetscFunctionReturn(0); 413 } 414 415 #undef __FUNCT__ 416 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 417 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 418 { 419 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 420 PetscErrorCode ierr; 421 422 PetscFunctionBegin; 423 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 424 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatZeroRows_MPIAIJ" 430 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 431 { 432 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 433 PetscErrorCode ierr; 434 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag; 435 PetscInt i,N,*rows,*owners = l->rowners; 436 PetscInt *nprocs,j,idx,nsends,row; 437 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 438 PetscInt *rvalues,count,base,slen,*source; 439 PetscInt *lens,*lrows,*values,rstart=l->rstart; 440 MPI_Comm comm = A->comm; 441 MPI_Request *send_waits,*recv_waits; 442 MPI_Status recv_status,*send_status; 443 IS istmp; 444 PetscTruth found; 445 446 PetscFunctionBegin; 447 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 448 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 449 450 /* first count number of contributors to each processor */ 451 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 452 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 453 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 454 for (i=0; i<N; i++) { 455 idx = rows[i]; 456 found = PETSC_FALSE; 457 for (j=0; j<size; j++) { 458 if (idx >= owners[j] && idx < owners[j+1]) { 459 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 460 } 461 } 462 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 463 } 464 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 465 466 /* inform other processors of number of messages and max length*/ 467 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 468 469 /* post receives: */ 470 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 471 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 472 for (i=0; i<nrecvs; i++) { 473 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 474 } 475 476 /* do sends: 477 1) starts[i] gives the starting index in svalues for stuff going to 478 the ith processor 479 */ 480 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 481 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 482 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 483 starts[0] = 0; 484 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 485 for (i=0; i<N; i++) { 486 svalues[starts[owner[i]]++] = rows[i]; 487 } 488 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 489 490 starts[0] = 0; 491 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 492 count = 0; 493 for (i=0; i<size; i++) { 494 if (nprocs[2*i+1]) { 495 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 496 } 497 } 498 ierr = PetscFree(starts);CHKERRQ(ierr); 499 500 base = owners[rank]; 501 502 /* wait on receives */ 503 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 504 source = lens + nrecvs; 505 count = nrecvs; slen = 0; 506 while (count) { 507 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 508 /* unpack receives into our local space */ 509 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 510 source[imdex] = recv_status.MPI_SOURCE; 511 lens[imdex] = n; 512 slen += n; 513 count--; 514 } 515 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 516 517 /* move the data into the send scatter */ 518 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 519 count = 0; 520 for (i=0; i<nrecvs; i++) { 521 values = rvalues + i*nmax; 522 for (j=0; j<lens[i]; j++) { 523 lrows[count++] = values[j] - base; 524 } 525 } 526 ierr = PetscFree(rvalues);CHKERRQ(ierr); 527 ierr = PetscFree(lens);CHKERRQ(ierr); 528 ierr = PetscFree(owner);CHKERRQ(ierr); 529 ierr = PetscFree(nprocs);CHKERRQ(ierr); 530 531 /* actually zap the local rows */ 532 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 533 PetscLogObjectParent(A,istmp); 534 535 /* 536 Zero the required rows. If the "diagonal block" of the matrix 537 is square and the user wishes to set the diagonal we use seperate 538 code so that MatSetValues() is not called for each diagonal allocating 539 new memory, thus calling lots of mallocs and slowing things down. 540 541 Contributed by: Mathew Knepley 542 */ 543 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 544 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 545 if (diag && (l->A->M == l->A->N)) { 546 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 547 } else if (diag) { 548 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 549 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 550 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 551 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 552 } 553 for (i = 0; i < slen; i++) { 554 row = lrows[i] + rstart; 555 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 556 } 557 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 558 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 559 } else { 560 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 561 } 562 ierr = ISDestroy(istmp);CHKERRQ(ierr); 563 ierr = PetscFree(lrows);CHKERRQ(ierr); 564 565 /* wait on sends */ 566 if (nsends) { 567 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 568 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 569 ierr = PetscFree(send_status);CHKERRQ(ierr); 570 } 571 ierr = PetscFree(send_waits);CHKERRQ(ierr); 572 ierr = PetscFree(svalues);CHKERRQ(ierr); 573 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatMult_MPIAIJ" 579 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 580 { 581 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 582 PetscErrorCode ierr; 583 PetscInt nt; 584 585 PetscFunctionBegin; 586 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 587 if (nt != A->n) { 588 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 589 } 590 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 591 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 592 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 593 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatMultAdd_MPIAIJ" 599 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 600 { 601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 602 PetscErrorCode ierr; 603 604 PetscFunctionBegin; 605 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 606 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 607 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 608 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 614 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 615 { 616 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 617 PetscErrorCode ierr; 618 619 PetscFunctionBegin; 620 /* do nondiagonal part */ 621 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 622 /* send it on its way */ 623 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 624 /* do local part */ 625 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 626 /* receive remote parts: note this assumes the values are not actually */ 627 /* inserted in yy until the next line, which is true for my implementation*/ 628 /* but is not perhaps always true. */ 629 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 EXTERN_C_BEGIN 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 636 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 637 { 638 MPI_Comm comm; 639 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 640 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 641 IS Me,Notme; 642 PetscErrorCode ierr; 643 PetscInt M,N,first,last,*notme,i; 644 PetscMPIInt size; 645 646 PetscFunctionBegin; 647 648 /* Easy test: symmetric diagonal block */ 649 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 650 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 651 if (!*f) PetscFunctionReturn(0); 652 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 653 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 654 if (size == 1) PetscFunctionReturn(0); 655 656 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 657 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 658 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 659 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 660 for (i=0; i<first; i++) notme[i] = i; 661 for (i=last; i<M; i++) notme[i-last+first] = i; 662 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 663 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 664 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 665 Aoff = Aoffs[0]; 666 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 667 Boff = Boffs[0]; 668 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 669 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 670 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 671 ierr = ISDestroy(Me);CHKERRQ(ierr); 672 ierr = ISDestroy(Notme);CHKERRQ(ierr); 673 674 PetscFunctionReturn(0); 675 } 676 EXTERN_C_END 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 680 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 681 { 682 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 683 PetscErrorCode ierr; 684 685 PetscFunctionBegin; 686 /* do nondiagonal part */ 687 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 688 /* send it on its way */ 689 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 690 /* do local part */ 691 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 692 /* receive remote parts: note this assumes the values are not actually */ 693 /* inserted in yy until the next line, which is true for my implementation*/ 694 /* but is not perhaps always true. */ 695 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 696 PetscFunctionReturn(0); 697 } 698 699 /* 700 This only works correctly for square matrices where the subblock A->A is the 701 diagonal block 702 */ 703 #undef __FUNCT__ 704 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 705 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 706 { 707 PetscErrorCode ierr; 708 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 709 710 PetscFunctionBegin; 711 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 712 if (a->rstart != a->cstart || a->rend != a->cend) { 713 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 714 } 715 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 716 PetscFunctionReturn(0); 717 } 718 719 #undef __FUNCT__ 720 #define __FUNCT__ "MatScale_MPIAIJ" 721 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 722 { 723 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 724 PetscErrorCode ierr; 725 726 PetscFunctionBegin; 727 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 728 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatDestroy_MPIAIJ" 734 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 735 { 736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 737 PetscErrorCode ierr; 738 739 PetscFunctionBegin; 740 #if defined(PETSC_USE_LOG) 741 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 742 #endif 743 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 744 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 745 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 746 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 747 #if defined (PETSC_USE_CTABLE) 748 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 749 #else 750 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 751 #endif 752 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 753 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 754 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 755 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 756 ierr = PetscFree(aij);CHKERRQ(ierr); 757 758 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 759 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 760 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 761 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 762 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 763 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 764 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatView_MPIAIJ_Binary" 770 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 771 { 772 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 773 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 774 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 775 PetscErrorCode ierr; 776 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 777 int fd; 778 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 779 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 780 PetscScalar *column_values; 781 782 PetscFunctionBegin; 783 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 784 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 785 nz = A->nz + B->nz; 786 if (!rank) { 787 header[0] = MAT_FILE_COOKIE; 788 header[1] = mat->M; 789 header[2] = mat->N; 790 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 791 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 792 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 793 /* get largest number of rows any processor has */ 794 rlen = mat->m; 795 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 796 for (i=1; i<size; i++) { 797 rlen = PetscMax(rlen,range[i+1] - range[i]); 798 } 799 } else { 800 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 801 rlen = mat->m; 802 } 803 804 /* load up the local row counts */ 805 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 806 for (i=0; i<mat->m; i++) { 807 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 808 } 809 810 /* store the row lengths to the file */ 811 if (!rank) { 812 MPI_Status status; 813 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 814 for (i=1; i<size; i++) { 815 rlen = range[i+1] - range[i]; 816 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 817 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 818 } 819 } else { 820 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 821 } 822 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 823 824 /* load up the local column indices */ 825 nzmax = nz; /* )th processor needs space a largest processor needs */ 826 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 827 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 828 cnt = 0; 829 for (i=0; i<mat->m; i++) { 830 for (j=B->i[i]; j<B->i[i+1]; j++) { 831 if ( (col = garray[B->j[j]]) > cstart) break; 832 column_indices[cnt++] = col; 833 } 834 for (k=A->i[i]; k<A->i[i+1]; k++) { 835 column_indices[cnt++] = A->j[k] + cstart; 836 } 837 for (; j<B->i[i+1]; j++) { 838 column_indices[cnt++] = garray[B->j[j]]; 839 } 840 } 841 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 842 843 /* store the column indices to the file */ 844 if (!rank) { 845 MPI_Status status; 846 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 847 for (i=1; i<size; i++) { 848 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 849 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 850 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 851 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 852 } 853 } else { 854 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 855 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 856 } 857 ierr = PetscFree(column_indices);CHKERRQ(ierr); 858 859 /* load up the local column values */ 860 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 861 cnt = 0; 862 for (i=0; i<mat->m; i++) { 863 for (j=B->i[i]; j<B->i[i+1]; j++) { 864 if ( garray[B->j[j]] > cstart) break; 865 column_values[cnt++] = B->a[j]; 866 } 867 for (k=A->i[i]; k<A->i[i+1]; k++) { 868 column_values[cnt++] = A->a[k]; 869 } 870 for (; j<B->i[i+1]; j++) { 871 column_values[cnt++] = B->a[j]; 872 } 873 } 874 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 875 876 /* store the column values to the file */ 877 if (!rank) { 878 MPI_Status status; 879 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 880 for (i=1; i<size; i++) { 881 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 882 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 883 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 884 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 885 } 886 } else { 887 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 888 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 889 } 890 ierr = PetscFree(column_values);CHKERRQ(ierr); 891 PetscFunctionReturn(0); 892 } 893 894 #undef __FUNCT__ 895 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 896 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 897 { 898 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 899 PetscErrorCode ierr; 900 PetscMPIInt rank = aij->rank,size = aij->size; 901 PetscTruth isdraw,iascii,flg,isbinary; 902 PetscViewer sviewer; 903 PetscViewerFormat format; 904 905 PetscFunctionBegin; 906 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 907 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 908 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 909 if (iascii) { 910 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 911 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 912 MatInfo info; 913 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 914 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 915 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 916 if (flg) { 917 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 918 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 919 } else { 920 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 921 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 922 } 923 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 924 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 925 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 926 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 927 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 928 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 929 PetscFunctionReturn(0); 930 } else if (format == PETSC_VIEWER_ASCII_INFO) { 931 PetscFunctionReturn(0); 932 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 933 PetscFunctionReturn(0); 934 } 935 } else if (isbinary) { 936 if (size == 1) { 937 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 938 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 939 } else { 940 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 941 } 942 PetscFunctionReturn(0); 943 } else if (isdraw) { 944 PetscDraw draw; 945 PetscTruth isnull; 946 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 947 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 948 } 949 950 if (size == 1) { 951 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 952 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 953 } else { 954 /* assemble the entire matrix onto first processor. */ 955 Mat A; 956 Mat_SeqAIJ *Aloc; 957 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 958 PetscScalar *a; 959 960 if (!rank) { 961 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 962 } else { 963 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 964 } 965 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 966 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 967 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 968 PetscLogObjectParent(mat,A); 969 970 /* copy over the A part */ 971 Aloc = (Mat_SeqAIJ*)aij->A->data; 972 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 973 row = aij->rstart; 974 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 975 for (i=0; i<m; i++) { 976 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 977 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 978 } 979 aj = Aloc->j; 980 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 981 982 /* copy over the B part */ 983 Aloc = (Mat_SeqAIJ*)aij->B->data; 984 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 985 row = aij->rstart; 986 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 987 ct = cols; 988 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 989 for (i=0; i<m; i++) { 990 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 991 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 992 } 993 ierr = PetscFree(ct);CHKERRQ(ierr); 994 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 995 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 996 /* 997 Everyone has to call to draw the matrix since the graphics waits are 998 synchronized across all processors that share the PetscDraw object 999 */ 1000 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1001 if (!rank) { 1002 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1003 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1004 } 1005 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1006 ierr = MatDestroy(A);CHKERRQ(ierr); 1007 } 1008 PetscFunctionReturn(0); 1009 } 1010 1011 #undef __FUNCT__ 1012 #define __FUNCT__ "MatView_MPIAIJ" 1013 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1014 { 1015 PetscErrorCode ierr; 1016 PetscTruth iascii,isdraw,issocket,isbinary; 1017 1018 PetscFunctionBegin; 1019 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1020 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1021 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1022 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1023 if (iascii || isdraw || isbinary || issocket) { 1024 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1025 } else { 1026 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1027 } 1028 PetscFunctionReturn(0); 1029 } 1030 1031 1032 1033 #undef __FUNCT__ 1034 #define __FUNCT__ "MatRelax_MPIAIJ" 1035 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1036 { 1037 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1038 PetscErrorCode ierr; 1039 Vec bb1; 1040 PetscScalar mone=-1.0; 1041 1042 PetscFunctionBegin; 1043 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1044 1045 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1046 1047 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1048 if (flag & SOR_ZERO_INITIAL_GUESS) { 1049 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1050 its--; 1051 } 1052 1053 while (its--) { 1054 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1055 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1056 1057 /* update rhs: bb1 = bb - B*x */ 1058 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1059 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1060 1061 /* local sweep */ 1062 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1063 CHKERRQ(ierr); 1064 } 1065 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1066 if (flag & SOR_ZERO_INITIAL_GUESS) { 1067 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1068 its--; 1069 } 1070 while (its--) { 1071 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1072 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1073 1074 /* update rhs: bb1 = bb - B*x */ 1075 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1076 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1077 1078 /* local sweep */ 1079 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1080 CHKERRQ(ierr); 1081 } 1082 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1083 if (flag & SOR_ZERO_INITIAL_GUESS) { 1084 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1085 its--; 1086 } 1087 while (its--) { 1088 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1089 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1090 1091 /* update rhs: bb1 = bb - B*x */ 1092 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1093 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1094 1095 /* local sweep */ 1096 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1097 CHKERRQ(ierr); 1098 } 1099 } else { 1100 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1101 } 1102 1103 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1109 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1110 { 1111 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1112 Mat A = mat->A,B = mat->B; 1113 PetscErrorCode ierr; 1114 PetscReal isend[5],irecv[5]; 1115 1116 PetscFunctionBegin; 1117 info->block_size = 1.0; 1118 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1119 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1120 isend[3] = info->memory; isend[4] = info->mallocs; 1121 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1122 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1123 isend[3] += info->memory; isend[4] += info->mallocs; 1124 if (flag == MAT_LOCAL) { 1125 info->nz_used = isend[0]; 1126 info->nz_allocated = isend[1]; 1127 info->nz_unneeded = isend[2]; 1128 info->memory = isend[3]; 1129 info->mallocs = isend[4]; 1130 } else if (flag == MAT_GLOBAL_MAX) { 1131 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1132 info->nz_used = irecv[0]; 1133 info->nz_allocated = irecv[1]; 1134 info->nz_unneeded = irecv[2]; 1135 info->memory = irecv[3]; 1136 info->mallocs = irecv[4]; 1137 } else if (flag == MAT_GLOBAL_SUM) { 1138 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1139 info->nz_used = irecv[0]; 1140 info->nz_allocated = irecv[1]; 1141 info->nz_unneeded = irecv[2]; 1142 info->memory = irecv[3]; 1143 info->mallocs = irecv[4]; 1144 } 1145 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1146 info->fill_ratio_needed = 0; 1147 info->factor_mallocs = 0; 1148 info->rows_global = (double)matin->M; 1149 info->columns_global = (double)matin->N; 1150 info->rows_local = (double)matin->m; 1151 info->columns_local = (double)matin->N; 1152 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatSetOption_MPIAIJ" 1158 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1159 { 1160 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1161 PetscErrorCode ierr; 1162 1163 PetscFunctionBegin; 1164 switch (op) { 1165 case MAT_NO_NEW_NONZERO_LOCATIONS: 1166 case MAT_YES_NEW_NONZERO_LOCATIONS: 1167 case MAT_COLUMNS_UNSORTED: 1168 case MAT_COLUMNS_SORTED: 1169 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1170 case MAT_KEEP_ZEROED_ROWS: 1171 case MAT_NEW_NONZERO_LOCATION_ERR: 1172 case MAT_USE_INODES: 1173 case MAT_DO_NOT_USE_INODES: 1174 case MAT_IGNORE_ZERO_ENTRIES: 1175 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1176 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1177 break; 1178 case MAT_ROW_ORIENTED: 1179 a->roworiented = PETSC_TRUE; 1180 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1181 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1182 break; 1183 case MAT_ROWS_SORTED: 1184 case MAT_ROWS_UNSORTED: 1185 case MAT_YES_NEW_DIAGONALS: 1186 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1187 break; 1188 case MAT_COLUMN_ORIENTED: 1189 a->roworiented = PETSC_FALSE; 1190 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1191 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1192 break; 1193 case MAT_IGNORE_OFF_PROC_ENTRIES: 1194 a->donotstash = PETSC_TRUE; 1195 break; 1196 case MAT_NO_NEW_DIAGONALS: 1197 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1198 case MAT_SYMMETRIC: 1199 case MAT_STRUCTURALLY_SYMMETRIC: 1200 case MAT_HERMITIAN: 1201 case MAT_SYMMETRY_ETERNAL: 1202 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1203 break; 1204 case MAT_NOT_SYMMETRIC: 1205 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1206 case MAT_NOT_HERMITIAN: 1207 case MAT_NOT_SYMMETRY_ETERNAL: 1208 break; 1209 default: 1210 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1211 } 1212 PetscFunctionReturn(0); 1213 } 1214 1215 #undef __FUNCT__ 1216 #define __FUNCT__ "MatGetRow_MPIAIJ" 1217 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1218 { 1219 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1220 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1221 PetscErrorCode ierr; 1222 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1223 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1224 PetscInt *cmap,*idx_p; 1225 1226 PetscFunctionBegin; 1227 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1228 mat->getrowactive = PETSC_TRUE; 1229 1230 if (!mat->rowvalues && (idx || v)) { 1231 /* 1232 allocate enough space to hold information from the longest row. 1233 */ 1234 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1235 PetscInt max = 1,tmp; 1236 for (i=0; i<matin->m; i++) { 1237 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1238 if (max < tmp) { max = tmp; } 1239 } 1240 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1241 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1242 } 1243 1244 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1245 lrow = row - rstart; 1246 1247 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1248 if (!v) {pvA = 0; pvB = 0;} 1249 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1250 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1251 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1252 nztot = nzA + nzB; 1253 1254 cmap = mat->garray; 1255 if (v || idx) { 1256 if (nztot) { 1257 /* Sort by increasing column numbers, assuming A and B already sorted */ 1258 PetscInt imark = -1; 1259 if (v) { 1260 *v = v_p = mat->rowvalues; 1261 for (i=0; i<nzB; i++) { 1262 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1263 else break; 1264 } 1265 imark = i; 1266 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1267 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1268 } 1269 if (idx) { 1270 *idx = idx_p = mat->rowindices; 1271 if (imark > -1) { 1272 for (i=0; i<imark; i++) { 1273 idx_p[i] = cmap[cworkB[i]]; 1274 } 1275 } else { 1276 for (i=0; i<nzB; i++) { 1277 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1278 else break; 1279 } 1280 imark = i; 1281 } 1282 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1283 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1284 } 1285 } else { 1286 if (idx) *idx = 0; 1287 if (v) *v = 0; 1288 } 1289 } 1290 *nz = nztot; 1291 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1292 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1293 PetscFunctionReturn(0); 1294 } 1295 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1298 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1299 { 1300 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1301 1302 PetscFunctionBegin; 1303 if (!aij->getrowactive) { 1304 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1305 } 1306 aij->getrowactive = PETSC_FALSE; 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatNorm_MPIAIJ" 1312 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1313 { 1314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1315 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1316 PetscErrorCode ierr; 1317 PetscInt i,j,cstart = aij->cstart; 1318 PetscReal sum = 0.0; 1319 PetscScalar *v; 1320 1321 PetscFunctionBegin; 1322 if (aij->size == 1) { 1323 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1324 } else { 1325 if (type == NORM_FROBENIUS) { 1326 v = amat->a; 1327 for (i=0; i<amat->nz; i++) { 1328 #if defined(PETSC_USE_COMPLEX) 1329 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1330 #else 1331 sum += (*v)*(*v); v++; 1332 #endif 1333 } 1334 v = bmat->a; 1335 for (i=0; i<bmat->nz; i++) { 1336 #if defined(PETSC_USE_COMPLEX) 1337 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1338 #else 1339 sum += (*v)*(*v); v++; 1340 #endif 1341 } 1342 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1343 *norm = sqrt(*norm); 1344 } else if (type == NORM_1) { /* max column norm */ 1345 PetscReal *tmp,*tmp2; 1346 PetscInt *jj,*garray = aij->garray; 1347 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1348 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1349 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1350 *norm = 0.0; 1351 v = amat->a; jj = amat->j; 1352 for (j=0; j<amat->nz; j++) { 1353 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1354 } 1355 v = bmat->a; jj = bmat->j; 1356 for (j=0; j<bmat->nz; j++) { 1357 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1358 } 1359 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1360 for (j=0; j<mat->N; j++) { 1361 if (tmp2[j] > *norm) *norm = tmp2[j]; 1362 } 1363 ierr = PetscFree(tmp);CHKERRQ(ierr); 1364 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1365 } else if (type == NORM_INFINITY) { /* max row norm */ 1366 PetscReal ntemp = 0.0; 1367 for (j=0; j<aij->A->m; j++) { 1368 v = amat->a + amat->i[j]; 1369 sum = 0.0; 1370 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1371 sum += PetscAbsScalar(*v); v++; 1372 } 1373 v = bmat->a + bmat->i[j]; 1374 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1375 sum += PetscAbsScalar(*v); v++; 1376 } 1377 if (sum > ntemp) ntemp = sum; 1378 } 1379 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1380 } else { 1381 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1382 } 1383 } 1384 PetscFunctionReturn(0); 1385 } 1386 1387 #undef __FUNCT__ 1388 #define __FUNCT__ "MatTranspose_MPIAIJ" 1389 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1390 { 1391 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1392 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1393 PetscErrorCode ierr; 1394 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1395 Mat B; 1396 PetscScalar *array; 1397 1398 PetscFunctionBegin; 1399 if (!matout && M != N) { 1400 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1401 } 1402 1403 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1404 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1405 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1406 1407 /* copy over the A part */ 1408 Aloc = (Mat_SeqAIJ*)a->A->data; 1409 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1410 row = a->rstart; 1411 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1412 for (i=0; i<m; i++) { 1413 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1414 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1415 } 1416 aj = Aloc->j; 1417 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1418 1419 /* copy over the B part */ 1420 Aloc = (Mat_SeqAIJ*)a->B->data; 1421 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1422 row = a->rstart; 1423 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1424 ct = cols; 1425 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1426 for (i=0; i<m; i++) { 1427 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1428 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1429 } 1430 ierr = PetscFree(ct);CHKERRQ(ierr); 1431 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1432 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1433 if (matout) { 1434 *matout = B; 1435 } else { 1436 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1437 } 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1443 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1444 { 1445 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1446 Mat a = aij->A,b = aij->B; 1447 PetscErrorCode ierr; 1448 PetscInt s1,s2,s3; 1449 1450 PetscFunctionBegin; 1451 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1452 if (rr) { 1453 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1454 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1455 /* Overlap communication with computation. */ 1456 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1457 } 1458 if (ll) { 1459 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1460 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1461 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1462 } 1463 /* scale the diagonal block */ 1464 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1465 1466 if (rr) { 1467 /* Do a scatter end and then right scale the off-diagonal block */ 1468 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1469 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1470 } 1471 1472 PetscFunctionReturn(0); 1473 } 1474 1475 1476 #undef __FUNCT__ 1477 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1478 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1479 { 1480 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1481 PetscErrorCode ierr; 1482 1483 PetscFunctionBegin; 1484 if (!a->rank) { 1485 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1486 } 1487 PetscFunctionReturn(0); 1488 } 1489 1490 #undef __FUNCT__ 1491 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1492 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1493 { 1494 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1495 PetscErrorCode ierr; 1496 1497 PetscFunctionBegin; 1498 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1499 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1500 PetscFunctionReturn(0); 1501 } 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1504 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1505 { 1506 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1507 PetscErrorCode ierr; 1508 1509 PetscFunctionBegin; 1510 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatEqual_MPIAIJ" 1516 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1517 { 1518 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1519 Mat a,b,c,d; 1520 PetscTruth flg; 1521 PetscErrorCode ierr; 1522 1523 PetscFunctionBegin; 1524 a = matA->A; b = matA->B; 1525 c = matB->A; d = matB->B; 1526 1527 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1528 if (flg) { 1529 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1530 } 1531 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1532 PetscFunctionReturn(0); 1533 } 1534 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "MatCopy_MPIAIJ" 1537 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1538 { 1539 PetscErrorCode ierr; 1540 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1541 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1542 1543 PetscFunctionBegin; 1544 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1545 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1546 /* because of the column compression in the off-processor part of the matrix a->B, 1547 the number of columns in a->B and b->B may be different, hence we cannot call 1548 the MatCopy() directly on the two parts. If need be, we can provide a more 1549 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1550 then copying the submatrices */ 1551 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1552 } else { 1553 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1554 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1555 } 1556 PetscFunctionReturn(0); 1557 } 1558 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1561 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1562 { 1563 PetscErrorCode ierr; 1564 1565 PetscFunctionBegin; 1566 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1567 PetscFunctionReturn(0); 1568 } 1569 1570 #include "petscblaslapack.h" 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatAXPY_MPIAIJ" 1573 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1574 { 1575 PetscErrorCode ierr; 1576 PetscInt i; 1577 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1578 PetscBLASInt bnz,one=1; 1579 Mat_SeqAIJ *x,*y; 1580 1581 PetscFunctionBegin; 1582 if (str == SAME_NONZERO_PATTERN) { 1583 x = (Mat_SeqAIJ *)xx->A->data; 1584 y = (Mat_SeqAIJ *)yy->A->data; 1585 bnz = (PetscBLASInt)x->nz; 1586 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1587 x = (Mat_SeqAIJ *)xx->B->data; 1588 y = (Mat_SeqAIJ *)yy->B->data; 1589 bnz = (PetscBLASInt)x->nz; 1590 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1591 } else if (str == SUBSET_NONZERO_PATTERN) { 1592 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1593 1594 x = (Mat_SeqAIJ *)xx->B->data; 1595 y = (Mat_SeqAIJ *)yy->B->data; 1596 if (y->xtoy && y->XtoY != xx->B) { 1597 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1598 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1599 } 1600 if (!y->xtoy) { /* get xtoy */ 1601 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1602 y->XtoY = xx->B; 1603 } 1604 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1605 } else { 1606 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1607 } 1608 PetscFunctionReturn(0); 1609 } 1610 1611 /* -------------------------------------------------------------------*/ 1612 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1613 MatGetRow_MPIAIJ, 1614 MatRestoreRow_MPIAIJ, 1615 MatMult_MPIAIJ, 1616 /* 4*/ MatMultAdd_MPIAIJ, 1617 MatMultTranspose_MPIAIJ, 1618 MatMultTransposeAdd_MPIAIJ, 1619 0, 1620 0, 1621 0, 1622 /*10*/ 0, 1623 0, 1624 0, 1625 MatRelax_MPIAIJ, 1626 MatTranspose_MPIAIJ, 1627 /*15*/ MatGetInfo_MPIAIJ, 1628 MatEqual_MPIAIJ, 1629 MatGetDiagonal_MPIAIJ, 1630 MatDiagonalScale_MPIAIJ, 1631 MatNorm_MPIAIJ, 1632 /*20*/ MatAssemblyBegin_MPIAIJ, 1633 MatAssemblyEnd_MPIAIJ, 1634 0, 1635 MatSetOption_MPIAIJ, 1636 MatZeroEntries_MPIAIJ, 1637 /*25*/ MatZeroRows_MPIAIJ, 1638 0, 1639 0, 1640 0, 1641 0, 1642 /*30*/ MatSetUpPreallocation_MPIAIJ, 1643 0, 1644 0, 1645 0, 1646 0, 1647 /*35*/ MatDuplicate_MPIAIJ, 1648 0, 1649 0, 1650 0, 1651 0, 1652 /*40*/ MatAXPY_MPIAIJ, 1653 MatGetSubMatrices_MPIAIJ, 1654 MatIncreaseOverlap_MPIAIJ, 1655 MatGetValues_MPIAIJ, 1656 MatCopy_MPIAIJ, 1657 /*45*/ MatPrintHelp_MPIAIJ, 1658 MatScale_MPIAIJ, 1659 0, 1660 0, 1661 0, 1662 /*50*/ MatSetBlockSize_MPIAIJ, 1663 0, 1664 0, 1665 0, 1666 0, 1667 /*55*/ MatFDColoringCreate_MPIAIJ, 1668 0, 1669 MatSetUnfactored_MPIAIJ, 1670 0, 1671 0, 1672 /*60*/ MatGetSubMatrix_MPIAIJ, 1673 MatDestroy_MPIAIJ, 1674 MatView_MPIAIJ, 1675 MatGetPetscMaps_Petsc, 1676 0, 1677 /*65*/ 0, 1678 0, 1679 0, 1680 0, 1681 0, 1682 /*70*/ 0, 1683 0, 1684 MatSetColoring_MPIAIJ, 1685 #if defined(PETSC_HAVE_ADIC) 1686 MatSetValuesAdic_MPIAIJ, 1687 #else 1688 0, 1689 #endif 1690 MatSetValuesAdifor_MPIAIJ, 1691 /*75*/ 0, 1692 0, 1693 0, 1694 0, 1695 0, 1696 /*80*/ 0, 1697 0, 1698 0, 1699 0, 1700 /*84*/ MatLoad_MPIAIJ, 1701 0, 1702 0, 1703 0, 1704 0, 1705 0, 1706 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1707 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1708 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1709 MatPtAP_MPIAIJ_MPIAIJ, 1710 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1711 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 1712 0, 1713 0, 1714 0}; 1715 1716 /* ----------------------------------------------------------------------------------------*/ 1717 1718 EXTERN_C_BEGIN 1719 #undef __FUNCT__ 1720 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1721 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1722 { 1723 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1724 PetscErrorCode ierr; 1725 1726 PetscFunctionBegin; 1727 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1728 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1729 PetscFunctionReturn(0); 1730 } 1731 EXTERN_C_END 1732 1733 EXTERN_C_BEGIN 1734 #undef __FUNCT__ 1735 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1736 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1737 { 1738 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1739 PetscErrorCode ierr; 1740 1741 PetscFunctionBegin; 1742 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1743 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1744 PetscFunctionReturn(0); 1745 } 1746 EXTERN_C_END 1747 1748 #include "petscpc.h" 1749 EXTERN_C_BEGIN 1750 #undef __FUNCT__ 1751 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1752 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1753 { 1754 Mat_MPIAIJ *b; 1755 PetscErrorCode ierr; 1756 PetscInt i; 1757 1758 PetscFunctionBegin; 1759 B->preallocated = PETSC_TRUE; 1760 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1761 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1762 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1763 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1764 if (d_nnz) { 1765 for (i=0; i<B->m; i++) { 1766 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1767 } 1768 } 1769 if (o_nnz) { 1770 for (i=0; i<B->m; i++) { 1771 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1772 } 1773 } 1774 b = (Mat_MPIAIJ*)B->data; 1775 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1776 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1777 1778 PetscFunctionReturn(0); 1779 } 1780 EXTERN_C_END 1781 1782 /*MC 1783 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1784 1785 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1786 and MATMPIAIJ otherwise. As a result, for single process communicators, 1787 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1788 for communicators controlling multiple processes. It is recommended that you call both of 1789 the above preallocation routines for simplicity. 1790 1791 Options Database Keys: 1792 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1793 1794 Level: beginner 1795 1796 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1797 M*/ 1798 1799 EXTERN_C_BEGIN 1800 #undef __FUNCT__ 1801 #define __FUNCT__ "MatCreate_AIJ" 1802 PetscErrorCode MatCreate_AIJ(Mat A) 1803 { 1804 PetscErrorCode ierr; 1805 PetscMPIInt size; 1806 1807 PetscFunctionBegin; 1808 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1809 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1810 if (size == 1) { 1811 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1812 } else { 1813 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1814 } 1815 PetscFunctionReturn(0); 1816 } 1817 EXTERN_C_END 1818 1819 #undef __FUNCT__ 1820 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1821 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1822 { 1823 Mat mat; 1824 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1825 PetscErrorCode ierr; 1826 1827 PetscFunctionBegin; 1828 *newmat = 0; 1829 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1830 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1831 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1832 a = (Mat_MPIAIJ*)mat->data; 1833 1834 mat->factor = matin->factor; 1835 mat->bs = matin->bs; 1836 mat->assembled = PETSC_TRUE; 1837 mat->insertmode = NOT_SET_VALUES; 1838 mat->preallocated = PETSC_TRUE; 1839 1840 a->rstart = oldmat->rstart; 1841 a->rend = oldmat->rend; 1842 a->cstart = oldmat->cstart; 1843 a->cend = oldmat->cend; 1844 a->size = oldmat->size; 1845 a->rank = oldmat->rank; 1846 a->donotstash = oldmat->donotstash; 1847 a->roworiented = oldmat->roworiented; 1848 a->rowindices = 0; 1849 a->rowvalues = 0; 1850 a->getrowactive = PETSC_FALSE; 1851 1852 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1853 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1854 if (oldmat->colmap) { 1855 #if defined (PETSC_USE_CTABLE) 1856 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1857 #else 1858 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1859 PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt)); 1860 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1861 #endif 1862 } else a->colmap = 0; 1863 if (oldmat->garray) { 1864 PetscInt len; 1865 len = oldmat->B->n; 1866 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1867 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 1868 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1869 } else a->garray = 0; 1870 1871 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1872 PetscLogObjectParent(mat,a->lvec); 1873 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1874 PetscLogObjectParent(mat,a->Mvctx); 1875 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1876 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1877 PetscLogObjectParent(mat,a->A); 1878 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1879 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1880 PetscLogObjectParent(mat,a->B); 1881 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1882 *newmat = mat; 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #include "petscsys.h" 1887 1888 #undef __FUNCT__ 1889 #define __FUNCT__ "MatLoad_MPIAIJ" 1890 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1891 { 1892 Mat A; 1893 PetscScalar *vals,*svals; 1894 MPI_Comm comm = ((PetscObject)viewer)->comm; 1895 MPI_Status status; 1896 PetscErrorCode ierr; 1897 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1898 PetscInt i,nz,j,rstart,rend; 1899 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1900 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1901 PetscInt cend,cstart,n,*rowners; 1902 int fd; 1903 1904 PetscFunctionBegin; 1905 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1906 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1907 if (!rank) { 1908 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1909 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1910 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1911 } 1912 1913 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1914 M = header[1]; N = header[2]; 1915 /* determine ownership of all rows */ 1916 m = M/size + ((M % size) > rank); 1917 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1918 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1919 rowners[0] = 0; 1920 for (i=2; i<=size; i++) { 1921 rowners[i] += rowners[i-1]; 1922 } 1923 rstart = rowners[rank]; 1924 rend = rowners[rank+1]; 1925 1926 /* distribute row lengths to all processors */ 1927 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 1928 if (!rank) { 1929 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1930 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1931 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1932 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1933 for (j=0; j<m; j++) { 1934 procsnz[0] += ourlens[j]; 1935 } 1936 for (i=1; i<size; i++) { 1937 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1938 /* calculate the number of nonzeros on each processor */ 1939 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1940 procsnz[i] += rowlengths[j]; 1941 } 1942 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1943 } 1944 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1945 } else { 1946 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1947 } 1948 1949 if (!rank) { 1950 /* determine max buffer needed and allocate it */ 1951 maxnz = 0; 1952 for (i=0; i<size; i++) { 1953 maxnz = PetscMax(maxnz,procsnz[i]); 1954 } 1955 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1956 1957 /* read in my part of the matrix column indices */ 1958 nz = procsnz[0]; 1959 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1960 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1961 1962 /* read in every one elses and ship off */ 1963 for (i=1; i<size; i++) { 1964 nz = procsnz[i]; 1965 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1966 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1967 } 1968 ierr = PetscFree(cols);CHKERRQ(ierr); 1969 } else { 1970 /* determine buffer space needed for message */ 1971 nz = 0; 1972 for (i=0; i<m; i++) { 1973 nz += ourlens[i]; 1974 } 1975 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1976 1977 /* receive message of column indices*/ 1978 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1979 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1980 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1981 } 1982 1983 /* determine column ownership if matrix is not square */ 1984 if (N != M) { 1985 n = N/size + ((N % size) > rank); 1986 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1987 cstart = cend - n; 1988 } else { 1989 cstart = rstart; 1990 cend = rend; 1991 n = cend - cstart; 1992 } 1993 1994 /* loop over local rows, determining number of off diagonal entries */ 1995 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1996 jj = 0; 1997 for (i=0; i<m; i++) { 1998 for (j=0; j<ourlens[i]; j++) { 1999 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2000 jj++; 2001 } 2002 } 2003 2004 /* create our matrix */ 2005 for (i=0; i<m; i++) { 2006 ourlens[i] -= offlens[i]; 2007 } 2008 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2009 ierr = MatSetType(A,type);CHKERRQ(ierr); 2010 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2011 2012 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2013 for (i=0; i<m; i++) { 2014 ourlens[i] += offlens[i]; 2015 } 2016 2017 if (!rank) { 2018 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2019 2020 /* read in my part of the matrix numerical values */ 2021 nz = procsnz[0]; 2022 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2023 2024 /* insert into matrix */ 2025 jj = rstart; 2026 smycols = mycols; 2027 svals = vals; 2028 for (i=0; i<m; i++) { 2029 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2030 smycols += ourlens[i]; 2031 svals += ourlens[i]; 2032 jj++; 2033 } 2034 2035 /* read in other processors and ship out */ 2036 for (i=1; i<size; i++) { 2037 nz = procsnz[i]; 2038 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2039 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2040 } 2041 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2042 } else { 2043 /* receive numeric values */ 2044 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2045 2046 /* receive message of values*/ 2047 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2048 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2049 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2050 2051 /* insert into matrix */ 2052 jj = rstart; 2053 smycols = mycols; 2054 svals = vals; 2055 for (i=0; i<m; i++) { 2056 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2057 smycols += ourlens[i]; 2058 svals += ourlens[i]; 2059 jj++; 2060 } 2061 } 2062 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2063 ierr = PetscFree(vals);CHKERRQ(ierr); 2064 ierr = PetscFree(mycols);CHKERRQ(ierr); 2065 ierr = PetscFree(rowners);CHKERRQ(ierr); 2066 2067 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2068 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2069 *newmat = A; 2070 PetscFunctionReturn(0); 2071 } 2072 2073 #undef __FUNCT__ 2074 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2075 /* 2076 Not great since it makes two copies of the submatrix, first an SeqAIJ 2077 in local and then by concatenating the local matrices the end result. 2078 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2079 */ 2080 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2081 { 2082 PetscErrorCode ierr; 2083 PetscMPIInt rank,size; 2084 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2085 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2086 Mat *local,M,Mreuse; 2087 PetscScalar *vwork,*aa; 2088 MPI_Comm comm = mat->comm; 2089 Mat_SeqAIJ *aij; 2090 2091 2092 PetscFunctionBegin; 2093 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2094 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2095 2096 if (call == MAT_REUSE_MATRIX) { 2097 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2098 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2099 local = &Mreuse; 2100 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2101 } else { 2102 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2103 Mreuse = *local; 2104 ierr = PetscFree(local);CHKERRQ(ierr); 2105 } 2106 2107 /* 2108 m - number of local rows 2109 n - number of columns (same on all processors) 2110 rstart - first row in new global matrix generated 2111 */ 2112 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2113 if (call == MAT_INITIAL_MATRIX) { 2114 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2115 ii = aij->i; 2116 jj = aij->j; 2117 2118 /* 2119 Determine the number of non-zeros in the diagonal and off-diagonal 2120 portions of the matrix in order to do correct preallocation 2121 */ 2122 2123 /* first get start and end of "diagonal" columns */ 2124 if (csize == PETSC_DECIDE) { 2125 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2126 if (mglobal == n) { /* square matrix */ 2127 nlocal = m; 2128 } else { 2129 nlocal = n/size + ((n % size) > rank); 2130 } 2131 } else { 2132 nlocal = csize; 2133 } 2134 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2135 rstart = rend - nlocal; 2136 if (rank == size - 1 && rend != n) { 2137 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2138 } 2139 2140 /* next, compute all the lengths */ 2141 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2142 olens = dlens + m; 2143 for (i=0; i<m; i++) { 2144 jend = ii[i+1] - ii[i]; 2145 olen = 0; 2146 dlen = 0; 2147 for (j=0; j<jend; j++) { 2148 if (*jj < rstart || *jj >= rend) olen++; 2149 else dlen++; 2150 jj++; 2151 } 2152 olens[i] = olen; 2153 dlens[i] = dlen; 2154 } 2155 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2156 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2157 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2158 ierr = PetscFree(dlens);CHKERRQ(ierr); 2159 } else { 2160 PetscInt ml,nl; 2161 2162 M = *newmat; 2163 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2164 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2165 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2166 /* 2167 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2168 rather than the slower MatSetValues(). 2169 */ 2170 M->was_assembled = PETSC_TRUE; 2171 M->assembled = PETSC_FALSE; 2172 } 2173 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2174 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2175 ii = aij->i; 2176 jj = aij->j; 2177 aa = aij->a; 2178 for (i=0; i<m; i++) { 2179 row = rstart + i; 2180 nz = ii[i+1] - ii[i]; 2181 cwork = jj; jj += nz; 2182 vwork = aa; aa += nz; 2183 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2184 } 2185 2186 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2187 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2188 *newmat = M; 2189 2190 /* save submatrix used in processor for next request */ 2191 if (call == MAT_INITIAL_MATRIX) { 2192 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2193 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2194 } 2195 2196 PetscFunctionReturn(0); 2197 } 2198 2199 EXTERN_C_BEGIN 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2202 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2203 { 2204 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2205 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2206 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2207 const PetscInt *JJ; 2208 PetscScalar *values; 2209 PetscErrorCode ierr; 2210 2211 PetscFunctionBegin; 2212 #if defined(PETSC_OPT_g) 2213 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2214 #endif 2215 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2216 o_nnz = d_nnz + m; 2217 2218 for (i=0; i<m; i++) { 2219 nnz = I[i+1]- I[i]; 2220 JJ = J + I[i]; 2221 nnz_max = PetscMax(nnz_max,nnz); 2222 #if defined(PETSC_OPT_g) 2223 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2224 #endif 2225 for (j=0; j<nnz; j++) { 2226 if (*JJ >= cstart) break; 2227 JJ++; 2228 } 2229 d = 0; 2230 for (; j<nnz; j++) { 2231 if (*JJ++ >= cend) break; 2232 d++; 2233 } 2234 d_nnz[i] = d; 2235 o_nnz[i] = nnz - d; 2236 } 2237 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2238 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2239 2240 if (v) values = (PetscScalar*)v; 2241 else { 2242 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2243 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2244 } 2245 2246 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2247 for (i=0; i<m; i++) { 2248 ii = i + rstart; 2249 nnz = I[i+1]- I[i]; 2250 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2251 } 2252 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2253 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2254 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2255 2256 if (!v) { 2257 ierr = PetscFree(values);CHKERRQ(ierr); 2258 } 2259 PetscFunctionReturn(0); 2260 } 2261 EXTERN_C_END 2262 2263 #undef __FUNCT__ 2264 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2265 /*@C 2266 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2267 (the default parallel PETSc format). 2268 2269 Collective on MPI_Comm 2270 2271 Input Parameters: 2272 + A - the matrix 2273 . i - the indices into j for the start of each local row (starts with zero) 2274 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2275 - v - optional values in the matrix 2276 2277 Level: developer 2278 2279 .keywords: matrix, aij, compressed row, sparse, parallel 2280 2281 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2282 @*/ 2283 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2284 { 2285 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2286 2287 PetscFunctionBegin; 2288 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2289 if (f) { 2290 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2291 } 2292 PetscFunctionReturn(0); 2293 } 2294 2295 #undef __FUNCT__ 2296 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2297 /*@C 2298 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2299 (the default parallel PETSc format). For good matrix assembly performance 2300 the user should preallocate the matrix storage by setting the parameters 2301 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2302 performance can be increased by more than a factor of 50. 2303 2304 Collective on MPI_Comm 2305 2306 Input Parameters: 2307 + A - the matrix 2308 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2309 (same value is used for all local rows) 2310 . d_nnz - array containing the number of nonzeros in the various rows of the 2311 DIAGONAL portion of the local submatrix (possibly different for each row) 2312 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2313 The size of this array is equal to the number of local rows, i.e 'm'. 2314 You must leave room for the diagonal entry even if it is zero. 2315 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2316 submatrix (same value is used for all local rows). 2317 - o_nnz - array containing the number of nonzeros in the various rows of the 2318 OFF-DIAGONAL portion of the local submatrix (possibly different for 2319 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2320 structure. The size of this array is equal to the number 2321 of local rows, i.e 'm'. 2322 2323 If the *_nnz parameter is given then the *_nz parameter is ignored 2324 2325 The AIJ format (also called the Yale sparse matrix format or 2326 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2327 storage. The stored row and column indices begin with zero. See the users manual for details. 2328 2329 The parallel matrix is partitioned such that the first m0 rows belong to 2330 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2331 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2332 2333 The DIAGONAL portion of the local submatrix of a processor can be defined 2334 as the submatrix which is obtained by extraction the part corresponding 2335 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2336 first row that belongs to the processor, and r2 is the last row belonging 2337 to the this processor. This is a square mxm matrix. The remaining portion 2338 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2339 2340 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2341 2342 Example usage: 2343 2344 Consider the following 8x8 matrix with 34 non-zero values, that is 2345 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2346 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2347 as follows: 2348 2349 .vb 2350 1 2 0 | 0 3 0 | 0 4 2351 Proc0 0 5 6 | 7 0 0 | 8 0 2352 9 0 10 | 11 0 0 | 12 0 2353 ------------------------------------- 2354 13 0 14 | 15 16 17 | 0 0 2355 Proc1 0 18 0 | 19 20 21 | 0 0 2356 0 0 0 | 22 23 0 | 24 0 2357 ------------------------------------- 2358 Proc2 25 26 27 | 0 0 28 | 29 0 2359 30 0 0 | 31 32 33 | 0 34 2360 .ve 2361 2362 This can be represented as a collection of submatrices as: 2363 2364 .vb 2365 A B C 2366 D E F 2367 G H I 2368 .ve 2369 2370 Where the submatrices A,B,C are owned by proc0, D,E,F are 2371 owned by proc1, G,H,I are owned by proc2. 2372 2373 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2374 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2375 The 'M','N' parameters are 8,8, and have the same values on all procs. 2376 2377 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2378 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2379 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2380 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2381 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2382 matrix, ans [DF] as another SeqAIJ matrix. 2383 2384 When d_nz, o_nz parameters are specified, d_nz storage elements are 2385 allocated for every row of the local diagonal submatrix, and o_nz 2386 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2387 One way to choose d_nz and o_nz is to use the max nonzerors per local 2388 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2389 In this case, the values of d_nz,o_nz are: 2390 .vb 2391 proc0 : dnz = 2, o_nz = 2 2392 proc1 : dnz = 3, o_nz = 2 2393 proc2 : dnz = 1, o_nz = 4 2394 .ve 2395 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2396 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2397 for proc3. i.e we are using 12+15+10=37 storage locations to store 2398 34 values. 2399 2400 When d_nnz, o_nnz parameters are specified, the storage is specified 2401 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2402 In the above case the values for d_nnz,o_nnz are: 2403 .vb 2404 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2405 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2406 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2407 .ve 2408 Here the space allocated is sum of all the above values i.e 34, and 2409 hence pre-allocation is perfect. 2410 2411 Level: intermediate 2412 2413 .keywords: matrix, aij, compressed row, sparse, parallel 2414 2415 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2416 MPIAIJ 2417 @*/ 2418 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2419 { 2420 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2421 2422 PetscFunctionBegin; 2423 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2424 if (f) { 2425 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2426 } 2427 PetscFunctionReturn(0); 2428 } 2429 2430 #undef __FUNCT__ 2431 #define __FUNCT__ "MatCreateMPIAIJ" 2432 /*@C 2433 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2434 (the default parallel PETSc format). For good matrix assembly performance 2435 the user should preallocate the matrix storage by setting the parameters 2436 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2437 performance can be increased by more than a factor of 50. 2438 2439 Collective on MPI_Comm 2440 2441 Input Parameters: 2442 + comm - MPI communicator 2443 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2444 This value should be the same as the local size used in creating the 2445 y vector for the matrix-vector product y = Ax. 2446 . n - This value should be the same as the local size used in creating the 2447 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2448 calculated if N is given) For square matrices n is almost always m. 2449 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2450 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2451 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2452 (same value is used for all local rows) 2453 . d_nnz - array containing the number of nonzeros in the various rows of the 2454 DIAGONAL portion of the local submatrix (possibly different for each row) 2455 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2456 The size of this array is equal to the number of local rows, i.e 'm'. 2457 You must leave room for the diagonal entry even if it is zero. 2458 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2459 submatrix (same value is used for all local rows). 2460 - o_nnz - array containing the number of nonzeros in the various rows of the 2461 OFF-DIAGONAL portion of the local submatrix (possibly different for 2462 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2463 structure. The size of this array is equal to the number 2464 of local rows, i.e 'm'. 2465 2466 Output Parameter: 2467 . A - the matrix 2468 2469 Notes: 2470 If the *_nnz parameter is given then the *_nz parameter is ignored 2471 2472 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2473 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2474 storage requirements for this matrix. 2475 2476 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2477 processor than it must be used on all processors that share the object for 2478 that argument. 2479 2480 The user MUST specify either the local or global matrix dimensions 2481 (possibly both). 2482 2483 The parallel matrix is partitioned such that the first m0 rows belong to 2484 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2485 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2486 2487 The DIAGONAL portion of the local submatrix of a processor can be defined 2488 as the submatrix which is obtained by extraction the part corresponding 2489 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2490 first row that belongs to the processor, and r2 is the last row belonging 2491 to the this processor. This is a square mxm matrix. The remaining portion 2492 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2493 2494 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2495 2496 When calling this routine with a single process communicator, a matrix of 2497 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2498 type of communicator, use the construction mechanism: 2499 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2500 2501 By default, this format uses inodes (identical nodes) when possible. 2502 We search for consecutive rows with the same nonzero structure, thereby 2503 reusing matrix information to achieve increased efficiency. 2504 2505 Options Database Keys: 2506 + -mat_aij_no_inode - Do not use inodes 2507 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2508 - -mat_aij_oneindex - Internally use indexing starting at 1 2509 rather than 0. Note that when calling MatSetValues(), 2510 the user still MUST index entries starting at 0! 2511 2512 2513 Example usage: 2514 2515 Consider the following 8x8 matrix with 34 non-zero values, that is 2516 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2517 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2518 as follows: 2519 2520 .vb 2521 1 2 0 | 0 3 0 | 0 4 2522 Proc0 0 5 6 | 7 0 0 | 8 0 2523 9 0 10 | 11 0 0 | 12 0 2524 ------------------------------------- 2525 13 0 14 | 15 16 17 | 0 0 2526 Proc1 0 18 0 | 19 20 21 | 0 0 2527 0 0 0 | 22 23 0 | 24 0 2528 ------------------------------------- 2529 Proc2 25 26 27 | 0 0 28 | 29 0 2530 30 0 0 | 31 32 33 | 0 34 2531 .ve 2532 2533 This can be represented as a collection of submatrices as: 2534 2535 .vb 2536 A B C 2537 D E F 2538 G H I 2539 .ve 2540 2541 Where the submatrices A,B,C are owned by proc0, D,E,F are 2542 owned by proc1, G,H,I are owned by proc2. 2543 2544 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2545 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2546 The 'M','N' parameters are 8,8, and have the same values on all procs. 2547 2548 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2549 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2550 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2551 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2552 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2553 matrix, ans [DF] as another SeqAIJ matrix. 2554 2555 When d_nz, o_nz parameters are specified, d_nz storage elements are 2556 allocated for every row of the local diagonal submatrix, and o_nz 2557 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2558 One way to choose d_nz and o_nz is to use the max nonzerors per local 2559 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2560 In this case, the values of d_nz,o_nz are: 2561 .vb 2562 proc0 : dnz = 2, o_nz = 2 2563 proc1 : dnz = 3, o_nz = 2 2564 proc2 : dnz = 1, o_nz = 4 2565 .ve 2566 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2567 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2568 for proc3. i.e we are using 12+15+10=37 storage locations to store 2569 34 values. 2570 2571 When d_nnz, o_nnz parameters are specified, the storage is specified 2572 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2573 In the above case the values for d_nnz,o_nnz are: 2574 .vb 2575 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2576 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2577 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2578 .ve 2579 Here the space allocated is sum of all the above values i.e 34, and 2580 hence pre-allocation is perfect. 2581 2582 Level: intermediate 2583 2584 .keywords: matrix, aij, compressed row, sparse, parallel 2585 2586 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2587 MPIAIJ 2588 @*/ 2589 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2590 { 2591 PetscErrorCode ierr; 2592 PetscMPIInt size; 2593 2594 PetscFunctionBegin; 2595 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2596 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2597 if (size > 1) { 2598 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2599 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2600 } else { 2601 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2602 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2603 } 2604 PetscFunctionReturn(0); 2605 } 2606 2607 #undef __FUNCT__ 2608 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2609 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2610 { 2611 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2612 2613 PetscFunctionBegin; 2614 *Ad = a->A; 2615 *Ao = a->B; 2616 *colmap = a->garray; 2617 PetscFunctionReturn(0); 2618 } 2619 2620 #undef __FUNCT__ 2621 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2622 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2623 { 2624 PetscErrorCode ierr; 2625 PetscInt i; 2626 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2627 2628 PetscFunctionBegin; 2629 if (coloring->ctype == IS_COLORING_LOCAL) { 2630 ISColoringValue *allcolors,*colors; 2631 ISColoring ocoloring; 2632 2633 /* set coloring for diagonal portion */ 2634 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2635 2636 /* set coloring for off-diagonal portion */ 2637 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2638 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2639 for (i=0; i<a->B->n; i++) { 2640 colors[i] = allcolors[a->garray[i]]; 2641 } 2642 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2643 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2644 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2645 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2646 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2647 ISColoringValue *colors; 2648 PetscInt *larray; 2649 ISColoring ocoloring; 2650 2651 /* set coloring for diagonal portion */ 2652 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2653 for (i=0; i<a->A->n; i++) { 2654 larray[i] = i + a->cstart; 2655 } 2656 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2657 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2658 for (i=0; i<a->A->n; i++) { 2659 colors[i] = coloring->colors[larray[i]]; 2660 } 2661 ierr = PetscFree(larray);CHKERRQ(ierr); 2662 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2663 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2664 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2665 2666 /* set coloring for off-diagonal portion */ 2667 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2668 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2669 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2670 for (i=0; i<a->B->n; i++) { 2671 colors[i] = coloring->colors[larray[i]]; 2672 } 2673 ierr = PetscFree(larray);CHKERRQ(ierr); 2674 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2675 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2676 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2677 } else { 2678 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2679 } 2680 2681 PetscFunctionReturn(0); 2682 } 2683 2684 #if defined(PETSC_HAVE_ADIC) 2685 #undef __FUNCT__ 2686 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2687 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2688 { 2689 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2690 PetscErrorCode ierr; 2691 2692 PetscFunctionBegin; 2693 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2694 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2695 PetscFunctionReturn(0); 2696 } 2697 #endif 2698 2699 #undef __FUNCT__ 2700 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2701 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2702 { 2703 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2704 PetscErrorCode ierr; 2705 2706 PetscFunctionBegin; 2707 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2708 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2709 PetscFunctionReturn(0); 2710 } 2711 2712 #undef __FUNCT__ 2713 #define __FUNCT__ "MatMerge" 2714 /*@C 2715 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2716 matrices from each processor 2717 2718 Collective on MPI_Comm 2719 2720 Input Parameters: 2721 + comm - the communicators the parallel matrix will live on 2722 . inmat - the input sequential matrices 2723 . n - number of local columns (or PETSC_DECIDE) 2724 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2725 2726 Output Parameter: 2727 . outmat - the parallel matrix generated 2728 2729 Level: advanced 2730 2731 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2732 2733 @*/ 2734 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2735 { 2736 PetscErrorCode ierr; 2737 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2738 PetscInt *indx; 2739 PetscScalar *values; 2740 PetscMap columnmap,rowmap; 2741 2742 PetscFunctionBegin; 2743 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2744 /* 2745 PetscMPIInt rank; 2746 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2747 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2748 */ 2749 if (scall == MAT_INITIAL_MATRIX){ 2750 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2751 if (n == PETSC_DECIDE){ 2752 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2753 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2754 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2755 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2756 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2757 } 2758 2759 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2760 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2761 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2762 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2763 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2764 2765 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2766 for (i=0;i<m;i++) { 2767 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2768 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2769 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2770 } 2771 /* This routine will ONLY return MPIAIJ type matrix */ 2772 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2773 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2774 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2775 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2776 2777 } else if (scall == MAT_REUSE_MATRIX){ 2778 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2779 } else { 2780 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2781 } 2782 2783 for (i=0;i<m;i++) { 2784 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2785 I = i + rstart; 2786 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2787 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2788 } 2789 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2790 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2791 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2792 2793 PetscFunctionReturn(0); 2794 } 2795 2796 #undef __FUNCT__ 2797 #define __FUNCT__ "MatFileSplit" 2798 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2799 { 2800 PetscErrorCode ierr; 2801 PetscMPIInt rank; 2802 PetscInt m,N,i,rstart,nnz; 2803 size_t len; 2804 const PetscInt *indx; 2805 PetscViewer out; 2806 char *name; 2807 Mat B; 2808 const PetscScalar *values; 2809 2810 PetscFunctionBegin; 2811 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2812 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2813 /* Should this be the type of the diagonal block of A? */ 2814 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2815 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2816 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2817 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2818 for (i=0;i<m;i++) { 2819 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2820 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2821 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2822 } 2823 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2824 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2825 2826 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2827 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2828 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2829 sprintf(name,"%s.%d",outfile,rank); 2830 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2831 ierr = PetscFree(name); 2832 ierr = MatView(B,out);CHKERRQ(ierr); 2833 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2834 ierr = MatDestroy(B);CHKERRQ(ierr); 2835 PetscFunctionReturn(0); 2836 } 2837 2838 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2839 #undef __FUNCT__ 2840 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2841 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2842 { 2843 PetscErrorCode ierr; 2844 Mat_Merge_SeqsToMPI *merge; 2845 PetscObjectContainer container; 2846 2847 PetscFunctionBegin; 2848 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2849 if (container) { 2850 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2851 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2852 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2853 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2854 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2855 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2856 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2857 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2858 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2859 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2860 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2861 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2862 2863 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2864 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2865 } 2866 ierr = PetscFree(merge);CHKERRQ(ierr); 2867 2868 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2869 PetscFunctionReturn(0); 2870 } 2871 2872 #include "src/mat/utils/freespace.h" 2873 #include "petscbt.h" 2874 #undef __FUNCT__ 2875 #define __FUNCT__ "MatMerge_SeqsToMPI" 2876 /*@C 2877 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2878 matrices from each processor 2879 2880 Collective on MPI_Comm 2881 2882 Input Parameters: 2883 + comm - the communicators the parallel matrix will live on 2884 . seqmat - the input sequential matrices 2885 . m - number of local rows (or PETSC_DECIDE) 2886 . n - number of local columns (or PETSC_DECIDE) 2887 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2888 2889 Output Parameter: 2890 . mpimat - the parallel matrix generated 2891 2892 Level: advanced 2893 2894 Notes: 2895 The dimensions of the sequential matrix in each processor MUST be the same. 2896 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2897 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2898 @*/ 2899 static PetscEvent logkey_seqstompinum = 0; 2900 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2901 { 2902 PetscErrorCode ierr; 2903 MPI_Comm comm=mpimat->comm; 2904 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2905 PetscMPIInt size,rank,taga,*len_s; 2906 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2907 PetscInt proc,m; 2908 PetscInt **buf_ri,**buf_rj; 2909 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2910 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2911 MPI_Request *s_waits,*r_waits; 2912 MPI_Status *status; 2913 MatScalar *aa=a->a,**abuf_r,*ba_i; 2914 Mat_Merge_SeqsToMPI *merge; 2915 PetscObjectContainer container; 2916 2917 PetscFunctionBegin; 2918 if (!logkey_seqstompinum) { 2919 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2920 } 2921 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2922 2923 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2924 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2925 2926 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2927 if (container) { 2928 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2929 } 2930 bi = merge->bi; 2931 bj = merge->bj; 2932 buf_ri = merge->buf_ri; 2933 buf_rj = merge->buf_rj; 2934 2935 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2936 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2937 len_s = merge->len_s; 2938 2939 /* send and recv matrix values */ 2940 /*-----------------------------*/ 2941 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2942 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2943 2944 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2945 for (proc=0,k=0; proc<size; proc++){ 2946 if (!len_s[proc]) continue; 2947 i = owners[proc]; 2948 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2949 k++; 2950 } 2951 2952 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2953 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2954 ierr = PetscFree(status);CHKERRQ(ierr); 2955 2956 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2957 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2958 2959 /* insert mat values of mpimat */ 2960 /*----------------------------*/ 2961 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2962 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2963 nextrow = buf_ri_k + merge->nrecv; 2964 nextai = nextrow + merge->nrecv; 2965 2966 for (k=0; k<merge->nrecv; k++){ 2967 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2968 nrows = *(buf_ri_k[k]); 2969 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2970 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2971 } 2972 2973 /* set values of ba */ 2974 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2975 for (i=0; i<m; i++) { 2976 arow = owners[rank] + i; 2977 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2978 bnzi = bi[i+1] - bi[i]; 2979 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2980 2981 /* add local non-zero vals of this proc's seqmat into ba */ 2982 anzi = ai[arow+1] - ai[arow]; 2983 aj = a->j + ai[arow]; 2984 aa = a->a + ai[arow]; 2985 nextaj = 0; 2986 for (j=0; nextaj<anzi; j++){ 2987 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2988 ba_i[j] += aa[nextaj++]; 2989 } 2990 } 2991 2992 /* add received vals into ba */ 2993 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2994 /* i-th row */ 2995 if (i == *nextrow[k]) { 2996 anzi = *(nextai[k]+1) - *nextai[k]; 2997 aj = buf_rj[k] + *(nextai[k]); 2998 aa = abuf_r[k] + *(nextai[k]); 2999 nextaj = 0; 3000 for (j=0; nextaj<anzi; j++){ 3001 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3002 ba_i[j] += aa[nextaj++]; 3003 } 3004 } 3005 nextrow[k]++; nextai[k]++; 3006 } 3007 } 3008 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3009 } 3010 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3011 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3012 3013 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3014 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3015 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3016 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3017 PetscFunctionReturn(0); 3018 } 3019 static PetscEvent logkey_seqstompisym = 0; 3020 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3021 { 3022 PetscErrorCode ierr; 3023 Mat B_mpi; 3024 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3025 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3026 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3027 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3028 PetscInt len,proc,*dnz,*onz; 3029 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3030 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3031 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3032 MPI_Status *status; 3033 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3034 PetscBT lnkbt; 3035 Mat_Merge_SeqsToMPI *merge; 3036 PetscObjectContainer container; 3037 3038 PetscFunctionBegin; 3039 if (!logkey_seqstompisym) { 3040 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3041 } 3042 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3043 3044 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3045 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3046 3047 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3048 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3049 3050 /* determine row ownership */ 3051 /*---------------------------------------------------------*/ 3052 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3053 if (m == PETSC_DECIDE) { 3054 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3055 } else { 3056 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3057 } 3058 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3059 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3060 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3061 3062 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3063 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3064 3065 /* determine the number of messages to send, their lengths */ 3066 /*---------------------------------------------------------*/ 3067 len_s = merge->len_s; 3068 3069 len = 0; /* length of buf_si[] */ 3070 merge->nsend = 0; 3071 for (proc=0; proc<size; proc++){ 3072 len_si[proc] = 0; 3073 if (proc == rank){ 3074 len_s[proc] = 0; 3075 } else { 3076 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3077 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3078 } 3079 if (len_s[proc]) { 3080 merge->nsend++; 3081 nrows = 0; 3082 for (i=owners[proc]; i<owners[proc+1]; i++){ 3083 if (ai[i+1] > ai[i]) nrows++; 3084 } 3085 len_si[proc] = 2*(nrows+1); 3086 len += len_si[proc]; 3087 } 3088 } 3089 3090 /* determine the number and length of messages to receive for ij-structure */ 3091 /*-------------------------------------------------------------------------*/ 3092 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3093 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3094 3095 /* post the Irecv of j-structure */ 3096 /*-------------------------------*/ 3097 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3098 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3099 3100 /* post the Isend of j-structure */ 3101 /*--------------------------------*/ 3102 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3103 sj_waits = si_waits + merge->nsend; 3104 3105 for (proc=0, k=0; proc<size; proc++){ 3106 if (!len_s[proc]) continue; 3107 i = owners[proc]; 3108 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3109 k++; 3110 } 3111 3112 /* receives and sends of j-structure are complete */ 3113 /*------------------------------------------------*/ 3114 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3115 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3116 3117 /* send and recv i-structure */ 3118 /*---------------------------*/ 3119 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3120 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3121 3122 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3123 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3124 for (proc=0,k=0; proc<size; proc++){ 3125 if (!len_s[proc]) continue; 3126 /* form outgoing message for i-structure: 3127 buf_si[0]: nrows to be sent 3128 [1:nrows]: row index (global) 3129 [nrows+1:2*nrows+1]: i-structure index 3130 */ 3131 /*-------------------------------------------*/ 3132 nrows = len_si[proc]/2 - 1; 3133 buf_si_i = buf_si + nrows+1; 3134 buf_si[0] = nrows; 3135 buf_si_i[0] = 0; 3136 nrows = 0; 3137 for (i=owners[proc]; i<owners[proc+1]; i++){ 3138 anzi = ai[i+1] - ai[i]; 3139 if (anzi) { 3140 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3141 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3142 nrows++; 3143 } 3144 } 3145 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3146 k++; 3147 buf_si += len_si[proc]; 3148 } 3149 3150 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3151 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3152 3153 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3154 for (i=0; i<merge->nrecv; i++){ 3155 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3156 } 3157 3158 ierr = PetscFree(len_si);CHKERRQ(ierr); 3159 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3160 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3161 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3162 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3163 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3164 ierr = PetscFree(status);CHKERRQ(ierr); 3165 3166 /* compute a local seq matrix in each processor */ 3167 /*----------------------------------------------*/ 3168 /* allocate bi array and free space for accumulating nonzero column info */ 3169 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3170 bi[0] = 0; 3171 3172 /* create and initialize a linked list */ 3173 nlnk = N+1; 3174 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3175 3176 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3177 len = 0; 3178 len = ai[owners[rank+1]] - ai[owners[rank]]; 3179 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3180 current_space = free_space; 3181 3182 /* determine symbolic info for each local row */ 3183 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3184 nextrow = buf_ri_k + merge->nrecv; 3185 nextai = nextrow + merge->nrecv; 3186 for (k=0; k<merge->nrecv; k++){ 3187 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3188 nrows = *buf_ri_k[k]; 3189 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3190 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3191 } 3192 3193 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3194 len = 0; 3195 for (i=0;i<m;i++) { 3196 bnzi = 0; 3197 /* add local non-zero cols of this proc's seqmat into lnk */ 3198 arow = owners[rank] + i; 3199 anzi = ai[arow+1] - ai[arow]; 3200 aj = a->j + ai[arow]; 3201 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3202 bnzi += nlnk; 3203 /* add received col data into lnk */ 3204 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3205 if (i == *nextrow[k]) { /* i-th row */ 3206 anzi = *(nextai[k]+1) - *nextai[k]; 3207 aj = buf_rj[k] + *nextai[k]; 3208 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3209 bnzi += nlnk; 3210 nextrow[k]++; nextai[k]++; 3211 } 3212 } 3213 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3214 3215 /* if free space is not available, make more free space */ 3216 if (current_space->local_remaining<bnzi) { 3217 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3218 nspacedouble++; 3219 } 3220 /* copy data into free space, then initialize lnk */ 3221 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3222 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3223 3224 current_space->array += bnzi; 3225 current_space->local_used += bnzi; 3226 current_space->local_remaining -= bnzi; 3227 3228 bi[i+1] = bi[i] + bnzi; 3229 } 3230 3231 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3232 3233 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3234 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3235 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3236 3237 /* create symbolic parallel matrix B_mpi */ 3238 /*---------------------------------------*/ 3239 if (n==PETSC_DECIDE) { 3240 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3241 } else { 3242 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3243 } 3244 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3245 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3246 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3247 3248 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3249 B_mpi->assembled = PETSC_FALSE; 3250 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3251 merge->bi = bi; 3252 merge->bj = bj; 3253 merge->buf_ri = buf_ri; 3254 merge->buf_rj = buf_rj; 3255 merge->coi = PETSC_NULL; 3256 merge->coj = PETSC_NULL; 3257 merge->owners_co = PETSC_NULL; 3258 3259 /* attach the supporting struct to B_mpi for reuse */ 3260 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3261 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3262 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3263 *mpimat = B_mpi; 3264 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3265 PetscFunctionReturn(0); 3266 } 3267 3268 static PetscEvent logkey_seqstompi = 0; 3269 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3270 { 3271 PetscErrorCode ierr; 3272 3273 PetscFunctionBegin; 3274 if (!logkey_seqstompi) { 3275 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3276 } 3277 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3278 if (scall == MAT_INITIAL_MATRIX){ 3279 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3280 } 3281 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3282 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3283 PetscFunctionReturn(0); 3284 } 3285 static PetscEvent logkey_getlocalmat = 0; 3286 #undef __FUNCT__ 3287 #define __FUNCT__ "MatGetLocalMat" 3288 /*@C 3289 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3290 3291 Not Collective 3292 3293 Input Parameters: 3294 + A - the matrix 3295 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3296 3297 Output Parameter: 3298 . A_loc - the local sequential matrix generated 3299 3300 Level: developer 3301 3302 @*/ 3303 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3304 { 3305 PetscErrorCode ierr; 3306 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3307 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3308 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3309 PetscScalar *aa=a->a,*ba=b->a,*ca; 3310 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3311 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3312 3313 PetscFunctionBegin; 3314 if (!logkey_getlocalmat) { 3315 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3316 } 3317 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3318 if (scall == MAT_INITIAL_MATRIX){ 3319 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3320 ci[0] = 0; 3321 for (i=0; i<am; i++){ 3322 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3323 } 3324 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3325 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3326 k = 0; 3327 for (i=0; i<am; i++) { 3328 ncols_o = bi[i+1] - bi[i]; 3329 ncols_d = ai[i+1] - ai[i]; 3330 /* off-diagonal portion of A */ 3331 for (jo=0; jo<ncols_o; jo++) { 3332 col = cmap[*bj]; 3333 if (col >= cstart) break; 3334 cj[k] = col; bj++; 3335 ca[k++] = *ba++; 3336 } 3337 /* diagonal portion of A */ 3338 for (j=0; j<ncols_d; j++) { 3339 cj[k] = cstart + *aj++; 3340 ca[k++] = *aa++; 3341 } 3342 /* off-diagonal portion of A */ 3343 for (j=jo; j<ncols_o; j++) { 3344 cj[k] = cmap[*bj++]; 3345 ca[k++] = *ba++; 3346 } 3347 } 3348 /* put together the new matrix */ 3349 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3350 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3351 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3352 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3353 mat->freedata = PETSC_TRUE; 3354 mat->nonew = 0; 3355 } else if (scall == MAT_REUSE_MATRIX){ 3356 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3357 ci = mat->i; cj = mat->j; ca = mat->a; 3358 for (i=0; i<am; i++) { 3359 /* off-diagonal portion of A */ 3360 ncols_o = bi[i+1] - bi[i]; 3361 for (jo=0; jo<ncols_o; jo++) { 3362 col = cmap[*bj]; 3363 if (col >= cstart) break; 3364 *ca++ = *ba++; bj++; 3365 } 3366 /* diagonal portion of A */ 3367 ncols_d = ai[i+1] - ai[i]; 3368 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3369 /* off-diagonal portion of A */ 3370 for (j=jo; j<ncols_o; j++) { 3371 *ca++ = *ba++; bj++; 3372 } 3373 } 3374 } else { 3375 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3376 } 3377 3378 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3379 PetscFunctionReturn(0); 3380 } 3381 3382 static PetscEvent logkey_getlocalmatcondensed = 0; 3383 #undef __FUNCT__ 3384 #define __FUNCT__ "MatGetLocalMatCondensed" 3385 /*@C 3386 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3387 3388 Not Collective 3389 3390 Input Parameters: 3391 + A - the matrix 3392 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3393 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3394 3395 Output Parameter: 3396 . A_loc - the local sequential matrix generated 3397 3398 Level: developer 3399 3400 @*/ 3401 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3402 { 3403 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3404 PetscErrorCode ierr; 3405 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3406 IS isrowa,iscola; 3407 Mat *aloc; 3408 3409 PetscFunctionBegin; 3410 if (!logkey_getlocalmatcondensed) { 3411 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3412 } 3413 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3414 if (!row){ 3415 start = a->rstart; end = a->rend; 3416 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3417 } else { 3418 isrowa = *row; 3419 } 3420 if (!col){ 3421 start = a->cstart; 3422 cmap = a->garray; 3423 nzA = a->A->n; 3424 nzB = a->B->n; 3425 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3426 ncols = 0; 3427 for (i=0; i<nzB; i++) { 3428 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3429 else break; 3430 } 3431 imark = i; 3432 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3433 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3434 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3435 ierr = PetscFree(idx);CHKERRQ(ierr); 3436 } else { 3437 iscola = *col; 3438 } 3439 if (scall != MAT_INITIAL_MATRIX){ 3440 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3441 aloc[0] = *A_loc; 3442 } 3443 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3444 *A_loc = aloc[0]; 3445 ierr = PetscFree(aloc);CHKERRQ(ierr); 3446 if (!row){ 3447 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3448 } 3449 if (!col){ 3450 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3451 } 3452 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3453 PetscFunctionReturn(0); 3454 } 3455 3456 static PetscEvent logkey_GetBrowsOfAcols = 0; 3457 #undef __FUNCT__ 3458 #define __FUNCT__ "MatGetBrowsOfAcols" 3459 /*@C 3460 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3461 3462 Collective on Mat 3463 3464 Input Parameters: 3465 + A,B - the matrices in mpiaij format 3466 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3467 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3468 3469 Output Parameter: 3470 + rowb, colb - index sets of rows and columns of B to extract 3471 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3472 - B_seq - the sequential matrix generated 3473 3474 Level: developer 3475 3476 @*/ 3477 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3478 { 3479 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3480 PetscErrorCode ierr; 3481 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3482 IS isrowb,iscolb; 3483 Mat *bseq; 3484 3485 PetscFunctionBegin; 3486 if (a->cstart != b->rstart || a->cend != b->rend){ 3487 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3488 } 3489 if (!logkey_GetBrowsOfAcols) { 3490 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3491 } 3492 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3493 3494 if (scall == MAT_INITIAL_MATRIX){ 3495 start = a->cstart; 3496 cmap = a->garray; 3497 nzA = a->A->n; 3498 nzB = a->B->n; 3499 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3500 ncols = 0; 3501 for (i=0; i<nzB; i++) { /* row < local row index */ 3502 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3503 else break; 3504 } 3505 imark = i; 3506 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3507 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3508 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3509 ierr = PetscFree(idx);CHKERRQ(ierr); 3510 *brstart = imark; 3511 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3512 } else { 3513 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3514 isrowb = *rowb; iscolb = *colb; 3515 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3516 bseq[0] = *B_seq; 3517 } 3518 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3519 *B_seq = bseq[0]; 3520 ierr = PetscFree(bseq);CHKERRQ(ierr); 3521 if (!rowb){ 3522 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3523 } else { 3524 *rowb = isrowb; 3525 } 3526 if (!colb){ 3527 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3528 } else { 3529 *colb = iscolb; 3530 } 3531 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3532 PetscFunctionReturn(0); 3533 } 3534 3535 static PetscEvent logkey_GetBrowsOfAocols = 0; 3536 #undef __FUNCT__ 3537 #define __FUNCT__ "MatGetBrowsOfAoCols" 3538 /*@C 3539 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3540 of the OFF-DIAGONAL portion of local A 3541 3542 Collective on Mat 3543 3544 Input Parameters: 3545 + A,B - the matrices in mpiaij format 3546 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3547 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3548 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3549 3550 Output Parameter: 3551 + B_oth - the sequential matrix generated 3552 3553 Level: developer 3554 3555 @*/ 3556 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3557 { 3558 VecScatter_MPI_General *gen_to,*gen_from; 3559 PetscErrorCode ierr; 3560 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3561 Mat_SeqAIJ *b_oth; 3562 VecScatter ctx=a->Mvctx; 3563 MPI_Comm comm=ctx->comm; 3564 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3565 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3566 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3567 PetscInt i,k,l,nrecvs,nsends,nrows,*rrow,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3568 MPI_Request *rwaits,*swaits; 3569 MPI_Status *sstatus,rstatus; 3570 PetscInt *cols; 3571 PetscScalar *vals; 3572 PetscMPIInt j; 3573 3574 PetscFunctionBegin; 3575 if (a->cstart != b->rstart || a->cend != b->rend){ 3576 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3577 } 3578 if (!logkey_GetBrowsOfAocols) { 3579 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3580 } 3581 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3582 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3583 3584 gen_to = (VecScatter_MPI_General*)ctx->todata; 3585 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3586 rvalues = gen_from->values; /* holds the length of sending row */ 3587 svalues = gen_to->values; /* holds the length of receiving row */ 3588 nrecvs = gen_from->n; 3589 nsends = gen_to->n; 3590 rwaits = gen_from->requests; 3591 swaits = gen_to->requests; 3592 rrow = gen_from->indices; /* local row index to be received */ 3593 srow = gen_to->indices; /* local row index to be sent */ 3594 rstarts = gen_from->starts; 3595 sstarts = gen_to->starts; 3596 rprocs = gen_from->procs; 3597 sprocs = gen_to->procs; 3598 sstatus = gen_to->sstatus; 3599 3600 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3601 if (scall == MAT_INITIAL_MATRIX){ 3602 /* i-array */ 3603 /*---------*/ 3604 /* post receives */ 3605 for (i=0; i<nrecvs; i++){ 3606 rowlen = (PetscInt*)rvalues + rstarts[i]; 3607 nrows = rstarts[i+1]-rstarts[i]; 3608 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3609 } 3610 3611 /* pack the outgoing message */ 3612 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3613 rstartsj = sstartsj + nsends +1; 3614 sstartsj[0] = 0; rstartsj[0] = 0; 3615 len = 0; /* total length of j or a array to be sent */ 3616 k = 0; 3617 for (i=0; i<nsends; i++){ 3618 rowlen = (PetscInt*)svalues + sstarts[i]; 3619 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3620 for (j=0; j<nrows; j++) { 3621 row = srow[k] + b->rowners[rank]; /* global row idx */ 3622 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3623 len += rowlen[j]; 3624 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3625 k++; 3626 } 3627 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3628 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3629 } 3630 /* recvs and sends of i-array are completed */ 3631 i = nrecvs; 3632 while (i--) { 3633 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3634 } 3635 if (nsends) { 3636 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3637 } 3638 /* allocate buffers for sending j and a arrays */ 3639 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3640 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3641 3642 /* create i-array of B_oth */ 3643 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3644 b_othi[0] = 0; 3645 len = 0; /* total length of j or a array to be received */ 3646 k = 0; 3647 for (i=0; i<nrecvs; i++){ 3648 rowlen = (PetscInt*)rvalues + rstarts[i]; 3649 nrows = rstarts[i+1]-rstarts[i]; 3650 for (j=0; j<nrows; j++) { 3651 b_othi[k+1] = b_othi[k] + rowlen[j]; 3652 len += rowlen[j]; k++; 3653 } 3654 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3655 } 3656 3657 /* allocate space for j and a arrrays of B_oth */ 3658 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3659 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3660 3661 /* j-array */ 3662 /*---------*/ 3663 /* post receives of j-array */ 3664 for (i=0; i<nrecvs; i++){ 3665 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3666 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3667 } 3668 k = 0; 3669 for (i=0; i<nsends; i++){ 3670 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3671 bufJ = bufj+sstartsj[i]; 3672 for (j=0; j<nrows; j++) { 3673 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3674 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3675 for (l=0; l<ncols; l++){ 3676 *bufJ++ = cols[l]; 3677 } 3678 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3679 } 3680 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3681 } 3682 3683 /* recvs and sends of j-array are completed */ 3684 i = nrecvs; 3685 while (i--) { 3686 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3687 } 3688 if (nsends) { 3689 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3690 } 3691 } else if (scall == MAT_REUSE_MATRIX){ 3692 sstartsj = *startsj; 3693 rstartsj = sstartsj + nsends +1; 3694 bufa = *bufa_ptr; 3695 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3696 b_otha = b_oth->a; 3697 } else { 3698 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3699 } 3700 3701 /* a-array */ 3702 /*---------*/ 3703 /* post receives of a-array */ 3704 for (i=0; i<nrecvs; i++){ 3705 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3706 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3707 } 3708 k = 0; 3709 for (i=0; i<nsends; i++){ 3710 nrows = sstarts[i+1]-sstarts[i]; 3711 bufA = bufa+sstartsj[i]; 3712 for (j=0; j<nrows; j++) { 3713 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3714 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3715 for (l=0; l<ncols; l++){ 3716 *bufA++ = vals[l]; 3717 } 3718 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3719 3720 } 3721 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3722 } 3723 /* recvs and sends of a-array are completed */ 3724 i = nrecvs; 3725 while (i--) { 3726 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3727 } 3728 if (nsends) { 3729 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3730 } 3731 3732 if (scall == MAT_INITIAL_MATRIX){ 3733 /* put together the new matrix */ 3734 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3735 3736 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3737 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3738 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3739 b_oth->freedata = PETSC_TRUE; 3740 b_oth->nonew = 0; 3741 3742 ierr = PetscFree(bufj);CHKERRQ(ierr); 3743 if (!startsj || !bufa_ptr){ 3744 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3745 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3746 } else { 3747 *startsj = sstartsj; 3748 *bufa_ptr = bufa; 3749 } 3750 } 3751 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3752 3753 PetscFunctionReturn(0); 3754 } 3755 3756 /*MC 3757 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3758 3759 Options Database Keys: 3760 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3761 3762 Level: beginner 3763 3764 .seealso: MatCreateMPIAIJ 3765 M*/ 3766 3767 EXTERN_C_BEGIN 3768 #undef __FUNCT__ 3769 #define __FUNCT__ "MatCreate_MPIAIJ" 3770 PetscErrorCode MatCreate_MPIAIJ(Mat B) 3771 { 3772 Mat_MPIAIJ *b; 3773 PetscErrorCode ierr; 3774 PetscInt i; 3775 PetscMPIInt size; 3776 3777 PetscFunctionBegin; 3778 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3779 3780 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3781 B->data = (void*)b; 3782 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3783 B->factor = 0; 3784 B->bs = 1; 3785 B->assembled = PETSC_FALSE; 3786 B->mapping = 0; 3787 3788 B->insertmode = NOT_SET_VALUES; 3789 b->size = size; 3790 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3791 3792 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3793 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3794 3795 /* the information in the maps duplicates the information computed below, eventually 3796 we should remove the duplicate information that is not contained in the maps */ 3797 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3798 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3799 3800 /* build local table of row and column ownerships */ 3801 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3802 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 3803 b->cowners = b->rowners + b->size + 2; 3804 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3805 b->rowners[0] = 0; 3806 for (i=2; i<=b->size; i++) { 3807 b->rowners[i] += b->rowners[i-1]; 3808 } 3809 b->rstart = b->rowners[b->rank]; 3810 b->rend = b->rowners[b->rank+1]; 3811 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3812 b->cowners[0] = 0; 3813 for (i=2; i<=b->size; i++) { 3814 b->cowners[i] += b->cowners[i-1]; 3815 } 3816 b->cstart = b->cowners[b->rank]; 3817 b->cend = b->cowners[b->rank+1]; 3818 3819 /* build cache for off array entries formed */ 3820 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3821 b->donotstash = PETSC_FALSE; 3822 b->colmap = 0; 3823 b->garray = 0; 3824 b->roworiented = PETSC_TRUE; 3825 3826 /* stuff used for matrix vector multiply */ 3827 b->lvec = PETSC_NULL; 3828 b->Mvctx = PETSC_NULL; 3829 3830 /* stuff for MatGetRow() */ 3831 b->rowindices = 0; 3832 b->rowvalues = 0; 3833 b->getrowactive = PETSC_FALSE; 3834 3835 /* Explicitly create 2 MATSEQAIJ matrices. */ 3836 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3837 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3838 PetscLogObjectParent(B,b->A); 3839 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3840 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3841 PetscLogObjectParent(B,b->B); 3842 3843 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3844 "MatStoreValues_MPIAIJ", 3845 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3846 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3847 "MatRetrieveValues_MPIAIJ", 3848 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3849 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3850 "MatGetDiagonalBlock_MPIAIJ", 3851 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3852 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3853 "MatIsTranspose_MPIAIJ", 3854 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3855 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3856 "MatMPIAIJSetPreallocation_MPIAIJ", 3857 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3858 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3859 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3860 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3861 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3862 "MatDiagonalScaleLocal_MPIAIJ", 3863 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3864 PetscFunctionReturn(0); 3865 } 3866 EXTERN_C_END 3867