1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 58 if (nonew == 1) goto a_noinsert; \ 59 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 60 if (nrow >= rmax) { \ 61 /* there is no extra room in row, therefore enlarge */ \ 62 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 63 PetscScalar *new_a; \ 64 \ 65 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 66 \ 67 /* malloc new storage space */ \ 68 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 69 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 70 new_j = (PetscInt*)(new_a + new_nz); \ 71 new_i = new_j + new_nz; \ 72 \ 73 /* copy over old data into new slots */ \ 74 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 75 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 76 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 77 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 78 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 79 len*sizeof(PetscInt));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 81 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 82 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 83 /* free up old matrix storage */ \ 84 \ 85 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 86 if (!a->singlemalloc) { \ 87 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 88 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 89 } \ 90 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 91 a->singlemalloc = PETSC_TRUE; \ 92 \ 93 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 94 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 95 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \ 96 a->maxnz += CHUNKSIZE; \ 97 a->reallocs++; \ 98 } \ 99 N = nrow++ - 1; a->nz++; \ 100 /* shift up all the later entries in this row */ \ 101 for (ii=N; ii>=_i; ii--) { \ 102 rp[ii+1] = rp[ii]; \ 103 ap[ii+1] = ap[ii]; \ 104 } \ 105 rp[_i] = col1; \ 106 ap[_i] = value; \ 107 a_noinsert: ; \ 108 ailen[row] = nrow; \ 109 } 110 111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 112 { \ 113 \ 114 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 115 rmax = bimax[row]; nrow = bilen[row]; \ 116 col1 = col - shift; \ 117 \ 118 low = 0; high = nrow; \ 119 while (high-low > 5) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > col) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > col1) break; \ 126 if (rp[_i] == col1) { \ 127 if (addv == ADD_VALUES) ap[_i] += value; \ 128 else ap[_i] = value; \ 129 goto b_noinsert; \ 130 } \ 131 } \ 132 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 133 if (nonew == 1) goto b_noinsert; \ 134 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 if (nrow >= rmax) { \ 136 /* there is no extra room in row, therefore enlarge */ \ 137 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 138 PetscScalar *new_a; \ 139 \ 140 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 141 \ 142 /* malloc new storage space */ \ 143 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 144 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 145 new_j = (PetscInt*)(new_a + new_nz); \ 146 new_i = new_j + new_nz; \ 147 \ 148 /* copy over old data into new slots */ \ 149 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 150 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 151 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 152 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 153 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 154 len*sizeof(PetscInt));CHKERRQ(ierr); \ 155 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 157 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 158 /* free up old matrix storage */ \ 159 \ 160 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 161 if (!b->singlemalloc) { \ 162 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 163 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 164 } \ 165 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 166 b->singlemalloc = PETSC_TRUE; \ 167 \ 168 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 169 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 170 ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \ 171 b->maxnz += CHUNKSIZE; \ 172 b->reallocs++; \ 173 } \ 174 N = nrow++ - 1; b->nz++; \ 175 /* shift up all the later entries in this row */ \ 176 for (ii=N; ii>=_i; ii--) { \ 177 rp[ii+1] = rp[ii]; \ 178 ap[ii+1] = ap[ii]; \ 179 } \ 180 rp[_i] = col1; \ 181 ap[_i] = value; \ 182 b_noinsert: ; \ 183 bilen[row] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIAIJ" 188 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 191 PetscScalar value; 192 PetscErrorCode ierr; 193 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 194 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 195 PetscTruth roworiented = aij->roworiented; 196 197 /* Some Variables required in the macro */ 198 Mat A = aij->A; 199 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 200 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 201 PetscScalar *aa = a->a; 202 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 203 Mat B = aij->B; 204 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 205 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 206 PetscScalar *ba = b->a; 207 208 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 209 PetscInt nonew = a->nonew,shift=0; 210 PetscScalar *ap; 211 212 PetscFunctionBegin; 213 for (i=0; i<m; i++) { 214 if (im[i] < 0) continue; 215 #if defined(PETSC_USE_DEBUG) 216 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 217 #endif 218 if (im[i] >= rstart && im[i] < rend) { 219 row = im[i] - rstart; 220 for (j=0; j<n; j++) { 221 if (in[j] >= cstart && in[j] < cend){ 222 col = in[j] - cstart; 223 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 224 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 225 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 226 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 227 } else if (in[j] < 0) continue; 228 #if defined(PETSC_USE_DEBUG) 229 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 230 #endif 231 else { 232 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 233 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 234 if (mat->was_assembled) { 235 if (!aij->colmap) { 236 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined (PETSC_USE_CTABLE) 239 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 240 col--; 241 #else 242 col = aij->colmap[in[j]] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 245 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 248 B = aij->B; 249 b = (Mat_SeqAIJ*)B->data; 250 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 251 ba = b->a; 252 } 253 } else col = in[j]; 254 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (!aij->donotstash) { 260 if (roworiented) { 261 if (ignorezeroentries && v[i*n] == 0.0) continue; 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 263 } else { 264 if (ignorezeroentries && v[i] == 0.0) continue; 265 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 266 } 267 } 268 } 269 } 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatGetValues_MPIAIJ" 275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 276 { 277 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 278 PetscErrorCode ierr; 279 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 280 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 281 282 PetscFunctionBegin; 283 for (i=0; i<m; i++) { 284 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 285 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 286 if (idxm[i] >= rstart && idxm[i] < rend) { 287 row = idxm[i] - rstart; 288 for (j=0; j<n; j++) { 289 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 290 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 291 if (idxn[j] >= cstart && idxn[j] < cend){ 292 col = idxn[j] - cstart; 293 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 294 } else { 295 if (!aij->colmap) { 296 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 297 } 298 #if defined (PETSC_USE_CTABLE) 299 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 300 col --; 301 #else 302 col = aij->colmap[idxn[j]] - 1; 303 #endif 304 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 305 else { 306 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 307 } 308 } 309 } 310 } else { 311 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 312 } 313 } 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 320 { 321 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 322 PetscErrorCode ierr; 323 PetscInt nstash,reallocs; 324 InsertMode addv; 325 326 PetscFunctionBegin; 327 if (aij->donotstash) { 328 PetscFunctionReturn(0); 329 } 330 331 /* make sure all processors are either in INSERTMODE or ADDMODE */ 332 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 333 if (addv == (ADD_VALUES|INSERT_VALUES)) { 334 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 335 } 336 mat->insertmode = addv; /* in case this processor had no cache */ 337 338 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 339 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 340 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNCT__ 345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 347 { 348 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 349 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 350 PetscErrorCode ierr; 351 PetscMPIInt n; 352 PetscInt i,j,rstart,ncols,flg; 353 PetscInt *row,*col,other_disassembled; 354 PetscScalar *val; 355 InsertMode addv = mat->insertmode; 356 357 PetscFunctionBegin; 358 if (!aij->donotstash) { 359 while (1) { 360 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 361 if (!flg) break; 362 363 for (i=0; i<n;) { 364 /* Now identify the consecutive vals belonging to the same row */ 365 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 366 if (j < n) ncols = j-i; 367 else ncols = n-i; 368 /* Now assemble all these values with a single function call */ 369 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 370 i = j; 371 } 372 } 373 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 374 } 375 a->compressedrow.use = PETSC_FALSE; 376 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 377 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 378 379 /* determine if any processor has disassembled, if so we must 380 also disassemble ourselfs, in order that we may reassemble. */ 381 /* 382 if nonzero structure of submatrix B cannot change then we know that 383 no processor disassembled thus we can skip this stuff 384 */ 385 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 386 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 387 if (mat->was_assembled && !other_disassembled) { 388 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 389 /* reaccess the b because aij->B was changed */ 390 b = (Mat_SeqAIJ *)aij->B->data; 391 } 392 } 393 394 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 395 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 396 } 397 398 b->inode.use = PETSC_FALSE; 399 b->compressedrow.use = PETSC_TRUE; 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 408 /* used by MatAXPY() */ 409 a->xtoy = 0; b->xtoy = 0; 410 a->XtoY = 0; b->XtoY = 0; 411 412 PetscFunctionReturn(0); 413 } 414 415 #undef __FUNCT__ 416 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 417 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 418 { 419 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 420 PetscErrorCode ierr; 421 422 PetscFunctionBegin; 423 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 424 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatZeroRows_MPIAIJ" 430 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 431 { 432 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 433 PetscErrorCode ierr; 434 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1; 435 PetscInt i,N,*rows,*owners = l->rowners; 436 PetscInt *nprocs,j,idx,nsends,row; 437 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 438 PetscInt *rvalues,count,base,slen,*source; 439 PetscInt *lens,*lrows,*values,rstart=l->rstart; 440 MPI_Comm comm = A->comm; 441 MPI_Request *send_waits,*recv_waits; 442 MPI_Status recv_status,*send_status; 443 IS istmp; 444 #if defined(PETSC_DEBUG) 445 PetscTruth found = PETSC_FALSE; 446 #endif 447 448 PetscFunctionBegin; 449 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 450 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 451 452 /* first count number of contributors to each processor */ 453 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 454 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 455 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 456 j = 0; 457 for (i=0; i<N; i++) { 458 if (lastidx > (idx = rows[i])) j = 0; 459 lastidx = idx; 460 for (; j<size; j++) { 461 if (idx >= owners[j] && idx < owners[j+1]) { 462 nprocs[2*j]++; 463 nprocs[2*j+1] = 1; 464 owner[i] = j; 465 #if defined(PETSC_DEBUG) 466 found = PETSC_TRUE; 467 #endif 468 break; 469 } 470 } 471 #if defined(PETSC_DEBUG) 472 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 473 found = PETSC_FALSE; 474 #endif 475 } 476 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 477 478 /* inform other processors of number of messages and max length*/ 479 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 480 481 /* post receives: */ 482 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 483 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 484 for (i=0; i<nrecvs; i++) { 485 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 486 } 487 488 /* do sends: 489 1) starts[i] gives the starting index in svalues for stuff going to 490 the ith processor 491 */ 492 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 493 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 494 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 495 starts[0] = 0; 496 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 497 for (i=0; i<N; i++) { 498 svalues[starts[owner[i]]++] = rows[i]; 499 } 500 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 501 502 starts[0] = 0; 503 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 504 count = 0; 505 for (i=0; i<size; i++) { 506 if (nprocs[2*i+1]) { 507 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 508 } 509 } 510 ierr = PetscFree(starts);CHKERRQ(ierr); 511 512 base = owners[rank]; 513 514 /* wait on receives */ 515 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 516 source = lens + nrecvs; 517 count = nrecvs; slen = 0; 518 while (count) { 519 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 520 /* unpack receives into our local space */ 521 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 522 source[imdex] = recv_status.MPI_SOURCE; 523 lens[imdex] = n; 524 slen += n; 525 count--; 526 } 527 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 528 529 /* move the data into the send scatter */ 530 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 531 count = 0; 532 for (i=0; i<nrecvs; i++) { 533 values = rvalues + i*nmax; 534 for (j=0; j<lens[i]; j++) { 535 lrows[count++] = values[j] - base; 536 } 537 } 538 ierr = PetscFree(rvalues);CHKERRQ(ierr); 539 ierr = PetscFree(lens);CHKERRQ(ierr); 540 ierr = PetscFree(owner);CHKERRQ(ierr); 541 ierr = PetscFree(nprocs);CHKERRQ(ierr); 542 543 /* actually zap the local rows */ 544 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 545 ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr); 546 547 /* 548 Zero the required rows. If the "diagonal block" of the matrix 549 is square and the user wishes to set the diagonal we use seperate 550 code so that MatSetValues() is not called for each diagonal allocating 551 new memory, thus calling lots of mallocs and slowing things down. 552 553 Contributed by: Mathew Knepley 554 */ 555 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 556 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 557 if (diag && (l->A->M == l->A->N)) { 558 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 559 } else if (diag) { 560 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 561 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 562 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 563 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 564 } 565 for (i = 0; i < slen; i++) { 566 row = lrows[i] + rstart; 567 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 568 } 569 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 570 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 571 } else { 572 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 573 } 574 ierr = ISDestroy(istmp);CHKERRQ(ierr); 575 ierr = PetscFree(lrows);CHKERRQ(ierr); 576 577 /* wait on sends */ 578 if (nsends) { 579 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 580 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 581 ierr = PetscFree(send_status);CHKERRQ(ierr); 582 } 583 ierr = PetscFree(send_waits);CHKERRQ(ierr); 584 ierr = PetscFree(svalues);CHKERRQ(ierr); 585 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatMult_MPIAIJ" 591 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 592 { 593 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 594 PetscErrorCode ierr; 595 PetscInt nt; 596 597 PetscFunctionBegin; 598 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 599 if (nt != A->n) { 600 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 601 } 602 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 603 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 604 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 605 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 606 PetscFunctionReturn(0); 607 } 608 609 #undef __FUNCT__ 610 #define __FUNCT__ "MatMultAdd_MPIAIJ" 611 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 612 { 613 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 614 PetscErrorCode ierr; 615 616 PetscFunctionBegin; 617 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 618 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 619 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 620 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 626 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 627 { 628 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 629 PetscErrorCode ierr; 630 PetscTruth merged; 631 632 PetscFunctionBegin; 633 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 634 /* do nondiagonal part */ 635 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 636 if (!merged) { 637 /* send it on its way */ 638 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 639 /* do local part */ 640 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 641 /* receive remote parts: note this assumes the values are not actually */ 642 /* added in yy until the next line, */ 643 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 644 } else { 645 /* do local part */ 646 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 647 /* send it on its way */ 648 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 649 /* values actually were received in the Begin() but we need to call this nop */ 650 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 651 } 652 PetscFunctionReturn(0); 653 } 654 655 EXTERN_C_BEGIN 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 658 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 659 { 660 MPI_Comm comm; 661 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 662 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 663 IS Me,Notme; 664 PetscErrorCode ierr; 665 PetscInt M,N,first,last,*notme,i; 666 PetscMPIInt size; 667 668 PetscFunctionBegin; 669 670 /* Easy test: symmetric diagonal block */ 671 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 672 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 673 if (!*f) PetscFunctionReturn(0); 674 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 675 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 676 if (size == 1) PetscFunctionReturn(0); 677 678 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 679 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 680 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 681 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 682 for (i=0; i<first; i++) notme[i] = i; 683 for (i=last; i<M; i++) notme[i-last+first] = i; 684 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 685 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 686 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 687 Aoff = Aoffs[0]; 688 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 689 Boff = Boffs[0]; 690 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 691 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 692 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 693 ierr = ISDestroy(Me);CHKERRQ(ierr); 694 ierr = ISDestroy(Notme);CHKERRQ(ierr); 695 696 PetscFunctionReturn(0); 697 } 698 EXTERN_C_END 699 700 #undef __FUNCT__ 701 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 702 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 703 { 704 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 705 PetscErrorCode ierr; 706 707 PetscFunctionBegin; 708 /* do nondiagonal part */ 709 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 710 /* send it on its way */ 711 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 712 /* do local part */ 713 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 714 /* receive remote parts */ 715 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 716 PetscFunctionReturn(0); 717 } 718 719 /* 720 This only works correctly for square matrices where the subblock A->A is the 721 diagonal block 722 */ 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 725 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 726 { 727 PetscErrorCode ierr; 728 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 729 730 PetscFunctionBegin; 731 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 732 if (a->rstart != a->cstart || a->rend != a->cend) { 733 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 734 } 735 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNCT__ 740 #define __FUNCT__ "MatScale_MPIAIJ" 741 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 742 { 743 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 744 PetscErrorCode ierr; 745 746 PetscFunctionBegin; 747 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 748 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 749 PetscFunctionReturn(0); 750 } 751 752 #undef __FUNCT__ 753 #define __FUNCT__ "MatDestroy_MPIAIJ" 754 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 755 { 756 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 757 PetscErrorCode ierr; 758 759 PetscFunctionBegin; 760 #if defined(PETSC_USE_LOG) 761 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 762 #endif 763 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 764 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 765 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 766 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 767 #if defined (PETSC_USE_CTABLE) 768 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 769 #else 770 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 771 #endif 772 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 773 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 774 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 775 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 776 ierr = PetscFree(aij);CHKERRQ(ierr); 777 778 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 779 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 780 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 781 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 782 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 783 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 784 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 785 PetscFunctionReturn(0); 786 } 787 788 #undef __FUNCT__ 789 #define __FUNCT__ "MatView_MPIAIJ_Binary" 790 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 791 { 792 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 793 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 794 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 795 PetscErrorCode ierr; 796 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 797 int fd; 798 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 799 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 800 PetscScalar *column_values; 801 802 PetscFunctionBegin; 803 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 804 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 805 nz = A->nz + B->nz; 806 if (!rank) { 807 header[0] = MAT_FILE_COOKIE; 808 header[1] = mat->M; 809 header[2] = mat->N; 810 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 811 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 812 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 813 /* get largest number of rows any processor has */ 814 rlen = mat->m; 815 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 816 for (i=1; i<size; i++) { 817 rlen = PetscMax(rlen,range[i+1] - range[i]); 818 } 819 } else { 820 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 821 rlen = mat->m; 822 } 823 824 /* load up the local row counts */ 825 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 826 for (i=0; i<mat->m; i++) { 827 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 828 } 829 830 /* store the row lengths to the file */ 831 if (!rank) { 832 MPI_Status status; 833 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 834 for (i=1; i<size; i++) { 835 rlen = range[i+1] - range[i]; 836 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 837 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 838 } 839 } else { 840 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 841 } 842 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 843 844 /* load up the local column indices */ 845 nzmax = nz; /* )th processor needs space a largest processor needs */ 846 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 847 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 848 cnt = 0; 849 for (i=0; i<mat->m; i++) { 850 for (j=B->i[i]; j<B->i[i+1]; j++) { 851 if ( (col = garray[B->j[j]]) > cstart) break; 852 column_indices[cnt++] = col; 853 } 854 for (k=A->i[i]; k<A->i[i+1]; k++) { 855 column_indices[cnt++] = A->j[k] + cstart; 856 } 857 for (; j<B->i[i+1]; j++) { 858 column_indices[cnt++] = garray[B->j[j]]; 859 } 860 } 861 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 862 863 /* store the column indices to the file */ 864 if (!rank) { 865 MPI_Status status; 866 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 867 for (i=1; i<size; i++) { 868 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 869 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 870 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 871 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 872 } 873 } else { 874 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 875 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 876 } 877 ierr = PetscFree(column_indices);CHKERRQ(ierr); 878 879 /* load up the local column values */ 880 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 881 cnt = 0; 882 for (i=0; i<mat->m; i++) { 883 for (j=B->i[i]; j<B->i[i+1]; j++) { 884 if ( garray[B->j[j]] > cstart) break; 885 column_values[cnt++] = B->a[j]; 886 } 887 for (k=A->i[i]; k<A->i[i+1]; k++) { 888 column_values[cnt++] = A->a[k]; 889 } 890 for (; j<B->i[i+1]; j++) { 891 column_values[cnt++] = B->a[j]; 892 } 893 } 894 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 895 896 /* store the column values to the file */ 897 if (!rank) { 898 MPI_Status status; 899 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 900 for (i=1; i<size; i++) { 901 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 902 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 903 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 904 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 905 } 906 } else { 907 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 908 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 909 } 910 ierr = PetscFree(column_values);CHKERRQ(ierr); 911 PetscFunctionReturn(0); 912 } 913 914 #undef __FUNCT__ 915 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 916 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 917 { 918 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 919 PetscErrorCode ierr; 920 PetscMPIInt rank = aij->rank,size = aij->size; 921 PetscTruth isdraw,iascii,isbinary; 922 PetscViewer sviewer; 923 PetscViewerFormat format; 924 925 PetscFunctionBegin; 926 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 927 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 928 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 929 if (iascii) { 930 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 931 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 932 MatInfo info; 933 Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data; 934 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 935 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 936 if (!a->inode.size) { 937 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 938 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 939 } else { 940 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 941 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 942 } 943 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 944 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 945 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 946 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 947 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 948 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } else if (format == PETSC_VIEWER_ASCII_INFO) { 951 Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data; 952 if (a->inode.size) { 953 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",a->inode.node_count,a->inode.limit);CHKERRQ(ierr); 954 } else { 955 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 956 } 957 PetscFunctionReturn(0); 958 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 959 PetscFunctionReturn(0); 960 } 961 } else if (isbinary) { 962 if (size == 1) { 963 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 964 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 965 } else { 966 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 967 } 968 PetscFunctionReturn(0); 969 } else if (isdraw) { 970 PetscDraw draw; 971 PetscTruth isnull; 972 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 973 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 974 } 975 976 if (size == 1) { 977 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 978 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 979 } else { 980 /* assemble the entire matrix onto first processor. */ 981 Mat A; 982 Mat_SeqAIJ *Aloc; 983 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 984 PetscScalar *a; 985 986 if (!rank) { 987 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 988 } else { 989 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 990 } 991 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 992 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 993 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 994 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 995 996 /* copy over the A part */ 997 Aloc = (Mat_SeqAIJ*)aij->A->data; 998 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 999 row = aij->rstart; 1000 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 1001 for (i=0; i<m; i++) { 1002 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1003 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1004 } 1005 aj = Aloc->j; 1006 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 1007 1008 /* copy over the B part */ 1009 Aloc = (Mat_SeqAIJ*)aij->B->data; 1010 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1011 row = aij->rstart; 1012 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1013 ct = cols; 1014 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1015 for (i=0; i<m; i++) { 1016 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1017 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1018 } 1019 ierr = PetscFree(ct);CHKERRQ(ierr); 1020 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1021 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1022 /* 1023 Everyone has to call to draw the matrix since the graphics waits are 1024 synchronized across all processors that share the PetscDraw object 1025 */ 1026 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1027 if (!rank) { 1028 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1029 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1030 } 1031 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1032 ierr = MatDestroy(A);CHKERRQ(ierr); 1033 } 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatView_MPIAIJ" 1039 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1040 { 1041 PetscErrorCode ierr; 1042 PetscTruth iascii,isdraw,issocket,isbinary; 1043 1044 PetscFunctionBegin; 1045 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1046 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1047 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1048 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1049 if (iascii || isdraw || isbinary || issocket) { 1050 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1051 } else { 1052 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1053 } 1054 PetscFunctionReturn(0); 1055 } 1056 1057 1058 1059 #undef __FUNCT__ 1060 #define __FUNCT__ "MatRelax_MPIAIJ" 1061 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1062 { 1063 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1064 PetscErrorCode ierr; 1065 Vec bb1; 1066 PetscScalar mone=-1.0; 1067 1068 PetscFunctionBegin; 1069 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1070 1071 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1072 1073 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1074 if (flag & SOR_ZERO_INITIAL_GUESS) { 1075 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1076 its--; 1077 } 1078 1079 while (its--) { 1080 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1081 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1082 1083 /* update rhs: bb1 = bb - B*x */ 1084 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1085 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1086 1087 /* local sweep */ 1088 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1089 CHKERRQ(ierr); 1090 } 1091 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1092 if (flag & SOR_ZERO_INITIAL_GUESS) { 1093 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1094 its--; 1095 } 1096 while (its--) { 1097 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1098 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1099 1100 /* update rhs: bb1 = bb - B*x */ 1101 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1102 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1103 1104 /* local sweep */ 1105 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1106 CHKERRQ(ierr); 1107 } 1108 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1109 if (flag & SOR_ZERO_INITIAL_GUESS) { 1110 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1111 its--; 1112 } 1113 while (its--) { 1114 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1115 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1116 1117 /* update rhs: bb1 = bb - B*x */ 1118 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1119 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1120 1121 /* local sweep */ 1122 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1123 CHKERRQ(ierr); 1124 } 1125 } else { 1126 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1127 } 1128 1129 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1135 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1136 { 1137 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1138 Mat A = mat->A,B = mat->B; 1139 PetscErrorCode ierr; 1140 PetscReal isend[5],irecv[5]; 1141 1142 PetscFunctionBegin; 1143 info->block_size = 1.0; 1144 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1145 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1146 isend[3] = info->memory; isend[4] = info->mallocs; 1147 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1148 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1149 isend[3] += info->memory; isend[4] += info->mallocs; 1150 if (flag == MAT_LOCAL) { 1151 info->nz_used = isend[0]; 1152 info->nz_allocated = isend[1]; 1153 info->nz_unneeded = isend[2]; 1154 info->memory = isend[3]; 1155 info->mallocs = isend[4]; 1156 } else if (flag == MAT_GLOBAL_MAX) { 1157 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1158 info->nz_used = irecv[0]; 1159 info->nz_allocated = irecv[1]; 1160 info->nz_unneeded = irecv[2]; 1161 info->memory = irecv[3]; 1162 info->mallocs = irecv[4]; 1163 } else if (flag == MAT_GLOBAL_SUM) { 1164 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1165 info->nz_used = irecv[0]; 1166 info->nz_allocated = irecv[1]; 1167 info->nz_unneeded = irecv[2]; 1168 info->memory = irecv[3]; 1169 info->mallocs = irecv[4]; 1170 } 1171 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1172 info->fill_ratio_needed = 0; 1173 info->factor_mallocs = 0; 1174 info->rows_global = (double)matin->M; 1175 info->columns_global = (double)matin->N; 1176 info->rows_local = (double)matin->m; 1177 info->columns_local = (double)matin->N; 1178 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatSetOption_MPIAIJ" 1184 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1185 { 1186 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1187 PetscErrorCode ierr; 1188 1189 PetscFunctionBegin; 1190 switch (op) { 1191 case MAT_NO_NEW_NONZERO_LOCATIONS: 1192 case MAT_YES_NEW_NONZERO_LOCATIONS: 1193 case MAT_COLUMNS_UNSORTED: 1194 case MAT_COLUMNS_SORTED: 1195 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1196 case MAT_KEEP_ZEROED_ROWS: 1197 case MAT_NEW_NONZERO_LOCATION_ERR: 1198 case MAT_USE_INODES: 1199 case MAT_DO_NOT_USE_INODES: 1200 case MAT_IGNORE_ZERO_ENTRIES: 1201 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1202 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1203 break; 1204 case MAT_ROW_ORIENTED: 1205 a->roworiented = PETSC_TRUE; 1206 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1207 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1208 break; 1209 case MAT_ROWS_SORTED: 1210 case MAT_ROWS_UNSORTED: 1211 case MAT_YES_NEW_DIAGONALS: 1212 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1213 break; 1214 case MAT_COLUMN_ORIENTED: 1215 a->roworiented = PETSC_FALSE; 1216 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1217 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1218 break; 1219 case MAT_IGNORE_OFF_PROC_ENTRIES: 1220 a->donotstash = PETSC_TRUE; 1221 break; 1222 case MAT_NO_NEW_DIAGONALS: 1223 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1224 case MAT_SYMMETRIC: 1225 case MAT_STRUCTURALLY_SYMMETRIC: 1226 case MAT_HERMITIAN: 1227 case MAT_SYMMETRY_ETERNAL: 1228 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1229 break; 1230 case MAT_NOT_SYMMETRIC: 1231 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1232 case MAT_NOT_HERMITIAN: 1233 case MAT_NOT_SYMMETRY_ETERNAL: 1234 break; 1235 default: 1236 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1237 } 1238 PetscFunctionReturn(0); 1239 } 1240 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatGetRow_MPIAIJ" 1243 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1244 { 1245 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1246 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1247 PetscErrorCode ierr; 1248 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1249 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1250 PetscInt *cmap,*idx_p; 1251 1252 PetscFunctionBegin; 1253 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1254 mat->getrowactive = PETSC_TRUE; 1255 1256 if (!mat->rowvalues && (idx || v)) { 1257 /* 1258 allocate enough space to hold information from the longest row. 1259 */ 1260 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1261 PetscInt max = 1,tmp; 1262 for (i=0; i<matin->m; i++) { 1263 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1264 if (max < tmp) { max = tmp; } 1265 } 1266 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1267 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1268 } 1269 1270 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1271 lrow = row - rstart; 1272 1273 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1274 if (!v) {pvA = 0; pvB = 0;} 1275 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1276 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1277 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1278 nztot = nzA + nzB; 1279 1280 cmap = mat->garray; 1281 if (v || idx) { 1282 if (nztot) { 1283 /* Sort by increasing column numbers, assuming A and B already sorted */ 1284 PetscInt imark = -1; 1285 if (v) { 1286 *v = v_p = mat->rowvalues; 1287 for (i=0; i<nzB; i++) { 1288 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1289 else break; 1290 } 1291 imark = i; 1292 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1293 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1294 } 1295 if (idx) { 1296 *idx = idx_p = mat->rowindices; 1297 if (imark > -1) { 1298 for (i=0; i<imark; i++) { 1299 idx_p[i] = cmap[cworkB[i]]; 1300 } 1301 } else { 1302 for (i=0; i<nzB; i++) { 1303 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1304 else break; 1305 } 1306 imark = i; 1307 } 1308 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1309 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1310 } 1311 } else { 1312 if (idx) *idx = 0; 1313 if (v) *v = 0; 1314 } 1315 } 1316 *nz = nztot; 1317 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1318 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1324 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1325 { 1326 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1327 1328 PetscFunctionBegin; 1329 if (!aij->getrowactive) { 1330 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1331 } 1332 aij->getrowactive = PETSC_FALSE; 1333 PetscFunctionReturn(0); 1334 } 1335 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "MatNorm_MPIAIJ" 1338 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1339 { 1340 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1341 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1342 PetscErrorCode ierr; 1343 PetscInt i,j,cstart = aij->cstart; 1344 PetscReal sum = 0.0; 1345 PetscScalar *v; 1346 1347 PetscFunctionBegin; 1348 if (aij->size == 1) { 1349 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1350 } else { 1351 if (type == NORM_FROBENIUS) { 1352 v = amat->a; 1353 for (i=0; i<amat->nz; i++) { 1354 #if defined(PETSC_USE_COMPLEX) 1355 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1356 #else 1357 sum += (*v)*(*v); v++; 1358 #endif 1359 } 1360 v = bmat->a; 1361 for (i=0; i<bmat->nz; i++) { 1362 #if defined(PETSC_USE_COMPLEX) 1363 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1364 #else 1365 sum += (*v)*(*v); v++; 1366 #endif 1367 } 1368 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1369 *norm = sqrt(*norm); 1370 } else if (type == NORM_1) { /* max column norm */ 1371 PetscReal *tmp,*tmp2; 1372 PetscInt *jj,*garray = aij->garray; 1373 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1374 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1375 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1376 *norm = 0.0; 1377 v = amat->a; jj = amat->j; 1378 for (j=0; j<amat->nz; j++) { 1379 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1380 } 1381 v = bmat->a; jj = bmat->j; 1382 for (j=0; j<bmat->nz; j++) { 1383 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1384 } 1385 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1386 for (j=0; j<mat->N; j++) { 1387 if (tmp2[j] > *norm) *norm = tmp2[j]; 1388 } 1389 ierr = PetscFree(tmp);CHKERRQ(ierr); 1390 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1391 } else if (type == NORM_INFINITY) { /* max row norm */ 1392 PetscReal ntemp = 0.0; 1393 for (j=0; j<aij->A->m; j++) { 1394 v = amat->a + amat->i[j]; 1395 sum = 0.0; 1396 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1397 sum += PetscAbsScalar(*v); v++; 1398 } 1399 v = bmat->a + bmat->i[j]; 1400 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1401 sum += PetscAbsScalar(*v); v++; 1402 } 1403 if (sum > ntemp) ntemp = sum; 1404 } 1405 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1406 } else { 1407 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1408 } 1409 } 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatTranspose_MPIAIJ" 1415 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1416 { 1417 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1418 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1419 PetscErrorCode ierr; 1420 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1421 Mat B; 1422 PetscScalar *array; 1423 1424 PetscFunctionBegin; 1425 if (!matout && M != N) { 1426 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1427 } 1428 1429 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1430 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1431 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1432 1433 /* copy over the A part */ 1434 Aloc = (Mat_SeqAIJ*)a->A->data; 1435 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1436 row = a->rstart; 1437 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1438 for (i=0; i<m; i++) { 1439 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1440 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1441 } 1442 aj = Aloc->j; 1443 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1444 1445 /* copy over the B part */ 1446 Aloc = (Mat_SeqAIJ*)a->B->data; 1447 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1448 row = a->rstart; 1449 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1450 ct = cols; 1451 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1452 for (i=0; i<m; i++) { 1453 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1454 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1455 } 1456 ierr = PetscFree(ct);CHKERRQ(ierr); 1457 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1458 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1459 if (matout) { 1460 *matout = B; 1461 } else { 1462 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1463 } 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNCT__ 1468 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1469 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1470 { 1471 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1472 Mat a = aij->A,b = aij->B; 1473 PetscErrorCode ierr; 1474 PetscInt s1,s2,s3; 1475 1476 PetscFunctionBegin; 1477 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1478 if (rr) { 1479 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1480 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1481 /* Overlap communication with computation. */ 1482 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1483 } 1484 if (ll) { 1485 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1486 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1487 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1488 } 1489 /* scale the diagonal block */ 1490 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1491 1492 if (rr) { 1493 /* Do a scatter end and then right scale the off-diagonal block */ 1494 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1495 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1496 } 1497 1498 PetscFunctionReturn(0); 1499 } 1500 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1504 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1505 { 1506 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1507 PetscErrorCode ierr; 1508 1509 PetscFunctionBegin; 1510 if (!a->rank) { 1511 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1512 } 1513 PetscFunctionReturn(0); 1514 } 1515 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1518 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1519 { 1520 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1521 PetscErrorCode ierr; 1522 1523 PetscFunctionBegin; 1524 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1525 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1526 PetscFunctionReturn(0); 1527 } 1528 #undef __FUNCT__ 1529 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1530 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1531 { 1532 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1533 PetscErrorCode ierr; 1534 1535 PetscFunctionBegin; 1536 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 #undef __FUNCT__ 1541 #define __FUNCT__ "MatEqual_MPIAIJ" 1542 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1543 { 1544 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1545 Mat a,b,c,d; 1546 PetscTruth flg; 1547 PetscErrorCode ierr; 1548 1549 PetscFunctionBegin; 1550 a = matA->A; b = matA->B; 1551 c = matB->A; d = matB->B; 1552 1553 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1554 if (flg) { 1555 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1556 } 1557 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #undef __FUNCT__ 1562 #define __FUNCT__ "MatCopy_MPIAIJ" 1563 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1564 { 1565 PetscErrorCode ierr; 1566 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1567 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1568 1569 PetscFunctionBegin; 1570 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1571 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1572 /* because of the column compression in the off-processor part of the matrix a->B, 1573 the number of columns in a->B and b->B may be different, hence we cannot call 1574 the MatCopy() directly on the two parts. If need be, we can provide a more 1575 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1576 then copying the submatrices */ 1577 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1578 } else { 1579 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1580 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1581 } 1582 PetscFunctionReturn(0); 1583 } 1584 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1587 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1588 { 1589 PetscErrorCode ierr; 1590 1591 PetscFunctionBegin; 1592 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1593 PetscFunctionReturn(0); 1594 } 1595 1596 #include "petscblaslapack.h" 1597 #undef __FUNCT__ 1598 #define __FUNCT__ "MatAXPY_MPIAIJ" 1599 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1600 { 1601 PetscErrorCode ierr; 1602 PetscInt i; 1603 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1604 PetscBLASInt bnz,one=1; 1605 Mat_SeqAIJ *x,*y; 1606 1607 PetscFunctionBegin; 1608 if (str == SAME_NONZERO_PATTERN) { 1609 x = (Mat_SeqAIJ *)xx->A->data; 1610 y = (Mat_SeqAIJ *)yy->A->data; 1611 bnz = (PetscBLASInt)x->nz; 1612 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1613 x = (Mat_SeqAIJ *)xx->B->data; 1614 y = (Mat_SeqAIJ *)yy->B->data; 1615 bnz = (PetscBLASInt)x->nz; 1616 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1617 } else if (str == SUBSET_NONZERO_PATTERN) { 1618 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1619 1620 x = (Mat_SeqAIJ *)xx->B->data; 1621 y = (Mat_SeqAIJ *)yy->B->data; 1622 if (y->xtoy && y->XtoY != xx->B) { 1623 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1624 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1625 } 1626 if (!y->xtoy) { /* get xtoy */ 1627 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1628 y->XtoY = xx->B; 1629 } 1630 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1631 } else { 1632 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1633 } 1634 PetscFunctionReturn(0); 1635 } 1636 1637 /* -------------------------------------------------------------------*/ 1638 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1639 MatGetRow_MPIAIJ, 1640 MatRestoreRow_MPIAIJ, 1641 MatMult_MPIAIJ, 1642 /* 4*/ MatMultAdd_MPIAIJ, 1643 MatMultTranspose_MPIAIJ, 1644 MatMultTransposeAdd_MPIAIJ, 1645 0, 1646 0, 1647 0, 1648 /*10*/ 0, 1649 0, 1650 0, 1651 MatRelax_MPIAIJ, 1652 MatTranspose_MPIAIJ, 1653 /*15*/ MatGetInfo_MPIAIJ, 1654 MatEqual_MPIAIJ, 1655 MatGetDiagonal_MPIAIJ, 1656 MatDiagonalScale_MPIAIJ, 1657 MatNorm_MPIAIJ, 1658 /*20*/ MatAssemblyBegin_MPIAIJ, 1659 MatAssemblyEnd_MPIAIJ, 1660 0, 1661 MatSetOption_MPIAIJ, 1662 MatZeroEntries_MPIAIJ, 1663 /*25*/ MatZeroRows_MPIAIJ, 1664 0, 1665 0, 1666 0, 1667 0, 1668 /*30*/ MatSetUpPreallocation_MPIAIJ, 1669 0, 1670 0, 1671 0, 1672 0, 1673 /*35*/ MatDuplicate_MPIAIJ, 1674 0, 1675 0, 1676 0, 1677 0, 1678 /*40*/ MatAXPY_MPIAIJ, 1679 MatGetSubMatrices_MPIAIJ, 1680 MatIncreaseOverlap_MPIAIJ, 1681 MatGetValues_MPIAIJ, 1682 MatCopy_MPIAIJ, 1683 /*45*/ MatPrintHelp_MPIAIJ, 1684 MatScale_MPIAIJ, 1685 0, 1686 0, 1687 0, 1688 /*50*/ MatSetBlockSize_MPIAIJ, 1689 0, 1690 0, 1691 0, 1692 0, 1693 /*55*/ MatFDColoringCreate_MPIAIJ, 1694 0, 1695 MatSetUnfactored_MPIAIJ, 1696 0, 1697 0, 1698 /*60*/ MatGetSubMatrix_MPIAIJ, 1699 MatDestroy_MPIAIJ, 1700 MatView_MPIAIJ, 1701 MatGetPetscMaps_Petsc, 1702 0, 1703 /*65*/ 0, 1704 0, 1705 0, 1706 0, 1707 0, 1708 /*70*/ 0, 1709 0, 1710 MatSetColoring_MPIAIJ, 1711 #if defined(PETSC_HAVE_ADIC) 1712 MatSetValuesAdic_MPIAIJ, 1713 #else 1714 0, 1715 #endif 1716 MatSetValuesAdifor_MPIAIJ, 1717 /*75*/ 0, 1718 0, 1719 0, 1720 0, 1721 0, 1722 /*80*/ 0, 1723 0, 1724 0, 1725 0, 1726 /*84*/ MatLoad_MPIAIJ, 1727 0, 1728 0, 1729 0, 1730 0, 1731 0, 1732 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1733 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1734 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1735 MatPtAP_Basic, 1736 MatPtAPSymbolic_MPIAIJ, 1737 /*95*/ MatPtAPNumeric_MPIAIJ, 1738 0, 1739 0, 1740 0, 1741 0, 1742 /*100*/0, 1743 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1744 MatPtAPNumeric_MPIAIJ_MPIAIJ, 1745 }; 1746 1747 /* ----------------------------------------------------------------------------------------*/ 1748 1749 EXTERN_C_BEGIN 1750 #undef __FUNCT__ 1751 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1752 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1753 { 1754 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1755 PetscErrorCode ierr; 1756 1757 PetscFunctionBegin; 1758 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1759 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1760 PetscFunctionReturn(0); 1761 } 1762 EXTERN_C_END 1763 1764 EXTERN_C_BEGIN 1765 #undef __FUNCT__ 1766 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1767 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1768 { 1769 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1770 PetscErrorCode ierr; 1771 1772 PetscFunctionBegin; 1773 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1774 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 EXTERN_C_END 1778 1779 #include "petscpc.h" 1780 EXTERN_C_BEGIN 1781 #undef __FUNCT__ 1782 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1783 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1784 { 1785 Mat_MPIAIJ *b; 1786 PetscErrorCode ierr; 1787 PetscInt i; 1788 1789 PetscFunctionBegin; 1790 B->preallocated = PETSC_TRUE; 1791 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1792 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1793 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1794 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1795 if (d_nnz) { 1796 for (i=0; i<B->m; i++) { 1797 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1798 } 1799 } 1800 if (o_nnz) { 1801 for (i=0; i<B->m; i++) { 1802 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1803 } 1804 } 1805 b = (Mat_MPIAIJ*)B->data; 1806 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1807 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1808 1809 PetscFunctionReturn(0); 1810 } 1811 EXTERN_C_END 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1815 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1816 { 1817 Mat mat; 1818 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1819 PetscErrorCode ierr; 1820 1821 PetscFunctionBegin; 1822 *newmat = 0; 1823 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1824 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1825 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1826 a = (Mat_MPIAIJ*)mat->data; 1827 1828 mat->factor = matin->factor; 1829 mat->bs = matin->bs; 1830 mat->assembled = PETSC_TRUE; 1831 mat->insertmode = NOT_SET_VALUES; 1832 mat->preallocated = PETSC_TRUE; 1833 1834 a->rstart = oldmat->rstart; 1835 a->rend = oldmat->rend; 1836 a->cstart = oldmat->cstart; 1837 a->cend = oldmat->cend; 1838 a->size = oldmat->size; 1839 a->rank = oldmat->rank; 1840 a->donotstash = oldmat->donotstash; 1841 a->roworiented = oldmat->roworiented; 1842 a->rowindices = 0; 1843 a->rowvalues = 0; 1844 a->getrowactive = PETSC_FALSE; 1845 1846 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1847 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1848 if (oldmat->colmap) { 1849 #if defined (PETSC_USE_CTABLE) 1850 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1851 #else 1852 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1853 ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1854 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1855 #endif 1856 } else a->colmap = 0; 1857 if (oldmat->garray) { 1858 PetscInt len; 1859 len = oldmat->B->n; 1860 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1861 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 1862 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1863 } else a->garray = 0; 1864 1865 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1866 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 1867 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1868 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 1869 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1870 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1871 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 1872 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1873 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1874 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 1875 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1876 *newmat = mat; 1877 PetscFunctionReturn(0); 1878 } 1879 1880 #include "petscsys.h" 1881 1882 #undef __FUNCT__ 1883 #define __FUNCT__ "MatLoad_MPIAIJ" 1884 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1885 { 1886 Mat A; 1887 PetscScalar *vals,*svals; 1888 MPI_Comm comm = ((PetscObject)viewer)->comm; 1889 MPI_Status status; 1890 PetscErrorCode ierr; 1891 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1892 PetscInt i,nz,j,rstart,rend; 1893 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1894 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1895 PetscInt cend,cstart,n,*rowners; 1896 int fd; 1897 1898 PetscFunctionBegin; 1899 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1900 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1901 if (!rank) { 1902 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1903 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1904 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1905 } 1906 1907 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1908 M = header[1]; N = header[2]; 1909 /* determine ownership of all rows */ 1910 m = M/size + ((M % size) > rank); 1911 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1912 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1913 rowners[0] = 0; 1914 for (i=2; i<=size; i++) { 1915 rowners[i] += rowners[i-1]; 1916 } 1917 rstart = rowners[rank]; 1918 rend = rowners[rank+1]; 1919 1920 /* distribute row lengths to all processors */ 1921 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 1922 if (!rank) { 1923 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1924 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1925 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1926 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1927 for (j=0; j<m; j++) { 1928 procsnz[0] += ourlens[j]; 1929 } 1930 for (i=1; i<size; i++) { 1931 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1932 /* calculate the number of nonzeros on each processor */ 1933 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1934 procsnz[i] += rowlengths[j]; 1935 } 1936 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1937 } 1938 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1939 } else { 1940 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1941 } 1942 1943 if (!rank) { 1944 /* determine max buffer needed and allocate it */ 1945 maxnz = 0; 1946 for (i=0; i<size; i++) { 1947 maxnz = PetscMax(maxnz,procsnz[i]); 1948 } 1949 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1950 1951 /* read in my part of the matrix column indices */ 1952 nz = procsnz[0]; 1953 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1954 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1955 1956 /* read in every one elses and ship off */ 1957 for (i=1; i<size; i++) { 1958 nz = procsnz[i]; 1959 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1960 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1961 } 1962 ierr = PetscFree(cols);CHKERRQ(ierr); 1963 } else { 1964 /* determine buffer space needed for message */ 1965 nz = 0; 1966 for (i=0; i<m; i++) { 1967 nz += ourlens[i]; 1968 } 1969 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1970 1971 /* receive message of column indices*/ 1972 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1973 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1974 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1975 } 1976 1977 /* determine column ownership if matrix is not square */ 1978 if (N != M) { 1979 n = N/size + ((N % size) > rank); 1980 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1981 cstart = cend - n; 1982 } else { 1983 cstart = rstart; 1984 cend = rend; 1985 n = cend - cstart; 1986 } 1987 1988 /* loop over local rows, determining number of off diagonal entries */ 1989 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1990 jj = 0; 1991 for (i=0; i<m; i++) { 1992 for (j=0; j<ourlens[i]; j++) { 1993 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1994 jj++; 1995 } 1996 } 1997 1998 /* create our matrix */ 1999 for (i=0; i<m; i++) { 2000 ourlens[i] -= offlens[i]; 2001 } 2002 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2003 ierr = MatSetType(A,type);CHKERRQ(ierr); 2004 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2005 2006 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2007 for (i=0; i<m; i++) { 2008 ourlens[i] += offlens[i]; 2009 } 2010 2011 if (!rank) { 2012 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2013 2014 /* read in my part of the matrix numerical values */ 2015 nz = procsnz[0]; 2016 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2017 2018 /* insert into matrix */ 2019 jj = rstart; 2020 smycols = mycols; 2021 svals = vals; 2022 for (i=0; i<m; i++) { 2023 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2024 smycols += ourlens[i]; 2025 svals += ourlens[i]; 2026 jj++; 2027 } 2028 2029 /* read in other processors and ship out */ 2030 for (i=1; i<size; i++) { 2031 nz = procsnz[i]; 2032 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2033 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2034 } 2035 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2036 } else { 2037 /* receive numeric values */ 2038 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2039 2040 /* receive message of values*/ 2041 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2042 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2043 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2044 2045 /* insert into matrix */ 2046 jj = rstart; 2047 smycols = mycols; 2048 svals = vals; 2049 for (i=0; i<m; i++) { 2050 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2051 smycols += ourlens[i]; 2052 svals += ourlens[i]; 2053 jj++; 2054 } 2055 } 2056 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2057 ierr = PetscFree(vals);CHKERRQ(ierr); 2058 ierr = PetscFree(mycols);CHKERRQ(ierr); 2059 ierr = PetscFree(rowners);CHKERRQ(ierr); 2060 2061 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2062 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2063 *newmat = A; 2064 PetscFunctionReturn(0); 2065 } 2066 2067 #undef __FUNCT__ 2068 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2069 /* 2070 Not great since it makes two copies of the submatrix, first an SeqAIJ 2071 in local and then by concatenating the local matrices the end result. 2072 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2073 */ 2074 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2075 { 2076 PetscErrorCode ierr; 2077 PetscMPIInt rank,size; 2078 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2079 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2080 Mat *local,M,Mreuse; 2081 PetscScalar *vwork,*aa; 2082 MPI_Comm comm = mat->comm; 2083 Mat_SeqAIJ *aij; 2084 2085 2086 PetscFunctionBegin; 2087 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2088 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2089 2090 if (call == MAT_REUSE_MATRIX) { 2091 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2092 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2093 local = &Mreuse; 2094 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2095 } else { 2096 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2097 Mreuse = *local; 2098 ierr = PetscFree(local);CHKERRQ(ierr); 2099 } 2100 2101 /* 2102 m - number of local rows 2103 n - number of columns (same on all processors) 2104 rstart - first row in new global matrix generated 2105 */ 2106 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2107 if (call == MAT_INITIAL_MATRIX) { 2108 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2109 ii = aij->i; 2110 jj = aij->j; 2111 2112 /* 2113 Determine the number of non-zeros in the diagonal and off-diagonal 2114 portions of the matrix in order to do correct preallocation 2115 */ 2116 2117 /* first get start and end of "diagonal" columns */ 2118 if (csize == PETSC_DECIDE) { 2119 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2120 if (mglobal == n) { /* square matrix */ 2121 nlocal = m; 2122 } else { 2123 nlocal = n/size + ((n % size) > rank); 2124 } 2125 } else { 2126 nlocal = csize; 2127 } 2128 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2129 rstart = rend - nlocal; 2130 if (rank == size - 1 && rend != n) { 2131 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2132 } 2133 2134 /* next, compute all the lengths */ 2135 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2136 olens = dlens + m; 2137 for (i=0; i<m; i++) { 2138 jend = ii[i+1] - ii[i]; 2139 olen = 0; 2140 dlen = 0; 2141 for (j=0; j<jend; j++) { 2142 if (*jj < rstart || *jj >= rend) olen++; 2143 else dlen++; 2144 jj++; 2145 } 2146 olens[i] = olen; 2147 dlens[i] = dlen; 2148 } 2149 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2150 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2151 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2152 ierr = PetscFree(dlens);CHKERRQ(ierr); 2153 } else { 2154 PetscInt ml,nl; 2155 2156 M = *newmat; 2157 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2158 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2159 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2160 /* 2161 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2162 rather than the slower MatSetValues(). 2163 */ 2164 M->was_assembled = PETSC_TRUE; 2165 M->assembled = PETSC_FALSE; 2166 } 2167 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2168 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2169 ii = aij->i; 2170 jj = aij->j; 2171 aa = aij->a; 2172 for (i=0; i<m; i++) { 2173 row = rstart + i; 2174 nz = ii[i+1] - ii[i]; 2175 cwork = jj; jj += nz; 2176 vwork = aa; aa += nz; 2177 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2178 } 2179 2180 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2181 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2182 *newmat = M; 2183 2184 /* save submatrix used in processor for next request */ 2185 if (call == MAT_INITIAL_MATRIX) { 2186 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2187 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2188 } 2189 2190 PetscFunctionReturn(0); 2191 } 2192 2193 EXTERN_C_BEGIN 2194 #undef __FUNCT__ 2195 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2196 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2197 { 2198 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2199 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2200 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2201 const PetscInt *JJ; 2202 PetscScalar *values; 2203 PetscErrorCode ierr; 2204 2205 PetscFunctionBegin; 2206 #if defined(PETSC_OPT_g) 2207 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2208 #endif 2209 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2210 o_nnz = d_nnz + m; 2211 2212 for (i=0; i<m; i++) { 2213 nnz = I[i+1]- I[i]; 2214 JJ = J + I[i]; 2215 nnz_max = PetscMax(nnz_max,nnz); 2216 #if defined(PETSC_OPT_g) 2217 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2218 #endif 2219 for (j=0; j<nnz; j++) { 2220 if (*JJ >= cstart) break; 2221 JJ++; 2222 } 2223 d = 0; 2224 for (; j<nnz; j++) { 2225 if (*JJ++ >= cend) break; 2226 d++; 2227 } 2228 d_nnz[i] = d; 2229 o_nnz[i] = nnz - d; 2230 } 2231 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2232 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2233 2234 if (v) values = (PetscScalar*)v; 2235 else { 2236 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2237 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2238 } 2239 2240 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2241 for (i=0; i<m; i++) { 2242 ii = i + rstart; 2243 nnz = I[i+1]- I[i]; 2244 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2245 } 2246 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2247 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2248 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2249 2250 if (!v) { 2251 ierr = PetscFree(values);CHKERRQ(ierr); 2252 } 2253 PetscFunctionReturn(0); 2254 } 2255 EXTERN_C_END 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2259 /*@C 2260 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2261 (the default parallel PETSc format). 2262 2263 Collective on MPI_Comm 2264 2265 Input Parameters: 2266 + A - the matrix 2267 . i - the indices into j for the start of each local row (starts with zero) 2268 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2269 - v - optional values in the matrix 2270 2271 Level: developer 2272 2273 .keywords: matrix, aij, compressed row, sparse, parallel 2274 2275 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2276 @*/ 2277 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2278 { 2279 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2280 2281 PetscFunctionBegin; 2282 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2283 if (f) { 2284 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2285 } 2286 PetscFunctionReturn(0); 2287 } 2288 2289 #undef __FUNCT__ 2290 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2291 /*@C 2292 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2293 (the default parallel PETSc format). For good matrix assembly performance 2294 the user should preallocate the matrix storage by setting the parameters 2295 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2296 performance can be increased by more than a factor of 50. 2297 2298 Collective on MPI_Comm 2299 2300 Input Parameters: 2301 + A - the matrix 2302 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2303 (same value is used for all local rows) 2304 . d_nnz - array containing the number of nonzeros in the various rows of the 2305 DIAGONAL portion of the local submatrix (possibly different for each row) 2306 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2307 The size of this array is equal to the number of local rows, i.e 'm'. 2308 You must leave room for the diagonal entry even if it is zero. 2309 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2310 submatrix (same value is used for all local rows). 2311 - o_nnz - array containing the number of nonzeros in the various rows of the 2312 OFF-DIAGONAL portion of the local submatrix (possibly different for 2313 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2314 structure. The size of this array is equal to the number 2315 of local rows, i.e 'm'. 2316 2317 If the *_nnz parameter is given then the *_nz parameter is ignored 2318 2319 The AIJ format (also called the Yale sparse matrix format or 2320 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2321 storage. The stored row and column indices begin with zero. See the users manual for details. 2322 2323 The parallel matrix is partitioned such that the first m0 rows belong to 2324 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2325 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2326 2327 The DIAGONAL portion of the local submatrix of a processor can be defined 2328 as the submatrix which is obtained by extraction the part corresponding 2329 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2330 first row that belongs to the processor, and r2 is the last row belonging 2331 to the this processor. This is a square mxm matrix. The remaining portion 2332 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2333 2334 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2335 2336 Example usage: 2337 2338 Consider the following 8x8 matrix with 34 non-zero values, that is 2339 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2340 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2341 as follows: 2342 2343 .vb 2344 1 2 0 | 0 3 0 | 0 4 2345 Proc0 0 5 6 | 7 0 0 | 8 0 2346 9 0 10 | 11 0 0 | 12 0 2347 ------------------------------------- 2348 13 0 14 | 15 16 17 | 0 0 2349 Proc1 0 18 0 | 19 20 21 | 0 0 2350 0 0 0 | 22 23 0 | 24 0 2351 ------------------------------------- 2352 Proc2 25 26 27 | 0 0 28 | 29 0 2353 30 0 0 | 31 32 33 | 0 34 2354 .ve 2355 2356 This can be represented as a collection of submatrices as: 2357 2358 .vb 2359 A B C 2360 D E F 2361 G H I 2362 .ve 2363 2364 Where the submatrices A,B,C are owned by proc0, D,E,F are 2365 owned by proc1, G,H,I are owned by proc2. 2366 2367 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2368 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2369 The 'M','N' parameters are 8,8, and have the same values on all procs. 2370 2371 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2372 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2373 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2374 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2375 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2376 matrix, ans [DF] as another SeqAIJ matrix. 2377 2378 When d_nz, o_nz parameters are specified, d_nz storage elements are 2379 allocated for every row of the local diagonal submatrix, and o_nz 2380 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2381 One way to choose d_nz and o_nz is to use the max nonzerors per local 2382 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2383 In this case, the values of d_nz,o_nz are: 2384 .vb 2385 proc0 : dnz = 2, o_nz = 2 2386 proc1 : dnz = 3, o_nz = 2 2387 proc2 : dnz = 1, o_nz = 4 2388 .ve 2389 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2390 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2391 for proc3. i.e we are using 12+15+10=37 storage locations to store 2392 34 values. 2393 2394 When d_nnz, o_nnz parameters are specified, the storage is specified 2395 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2396 In the above case the values for d_nnz,o_nnz are: 2397 .vb 2398 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2399 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2400 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2401 .ve 2402 Here the space allocated is sum of all the above values i.e 34, and 2403 hence pre-allocation is perfect. 2404 2405 Level: intermediate 2406 2407 .keywords: matrix, aij, compressed row, sparse, parallel 2408 2409 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2410 MPIAIJ 2411 @*/ 2412 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2413 { 2414 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2415 2416 PetscFunctionBegin; 2417 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2418 if (f) { 2419 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2420 } 2421 PetscFunctionReturn(0); 2422 } 2423 2424 #undef __FUNCT__ 2425 #define __FUNCT__ "MatCreateMPIAIJ" 2426 /*@C 2427 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2428 (the default parallel PETSc format). For good matrix assembly performance 2429 the user should preallocate the matrix storage by setting the parameters 2430 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2431 performance can be increased by more than a factor of 50. 2432 2433 Collective on MPI_Comm 2434 2435 Input Parameters: 2436 + comm - MPI communicator 2437 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2438 This value should be the same as the local size used in creating the 2439 y vector for the matrix-vector product y = Ax. 2440 . n - This value should be the same as the local size used in creating the 2441 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2442 calculated if N is given) For square matrices n is almost always m. 2443 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2444 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2445 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2446 (same value is used for all local rows) 2447 . d_nnz - array containing the number of nonzeros in the various rows of the 2448 DIAGONAL portion of the local submatrix (possibly different for each row) 2449 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2450 The size of this array is equal to the number of local rows, i.e 'm'. 2451 You must leave room for the diagonal entry even if it is zero. 2452 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2453 submatrix (same value is used for all local rows). 2454 - o_nnz - array containing the number of nonzeros in the various rows of the 2455 OFF-DIAGONAL portion of the local submatrix (possibly different for 2456 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2457 structure. The size of this array is equal to the number 2458 of local rows, i.e 'm'. 2459 2460 Output Parameter: 2461 . A - the matrix 2462 2463 Notes: 2464 If the *_nnz parameter is given then the *_nz parameter is ignored 2465 2466 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2467 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2468 storage requirements for this matrix. 2469 2470 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2471 processor than it must be used on all processors that share the object for 2472 that argument. 2473 2474 The user MUST specify either the local or global matrix dimensions 2475 (possibly both). 2476 2477 The parallel matrix is partitioned such that the first m0 rows belong to 2478 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2479 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2480 2481 The DIAGONAL portion of the local submatrix of a processor can be defined 2482 as the submatrix which is obtained by extraction the part corresponding 2483 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2484 first row that belongs to the processor, and r2 is the last row belonging 2485 to the this processor. This is a square mxm matrix. The remaining portion 2486 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2487 2488 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2489 2490 When calling this routine with a single process communicator, a matrix of 2491 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2492 type of communicator, use the construction mechanism: 2493 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2494 2495 By default, this format uses inodes (identical nodes) when possible. 2496 We search for consecutive rows with the same nonzero structure, thereby 2497 reusing matrix information to achieve increased efficiency. 2498 2499 Options Database Keys: 2500 + -mat_aij_no_inode - Do not use inodes 2501 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2502 - -mat_aij_oneindex - Internally use indexing starting at 1 2503 rather than 0. Note that when calling MatSetValues(), 2504 the user still MUST index entries starting at 0! 2505 2506 2507 Example usage: 2508 2509 Consider the following 8x8 matrix with 34 non-zero values, that is 2510 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2511 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2512 as follows: 2513 2514 .vb 2515 1 2 0 | 0 3 0 | 0 4 2516 Proc0 0 5 6 | 7 0 0 | 8 0 2517 9 0 10 | 11 0 0 | 12 0 2518 ------------------------------------- 2519 13 0 14 | 15 16 17 | 0 0 2520 Proc1 0 18 0 | 19 20 21 | 0 0 2521 0 0 0 | 22 23 0 | 24 0 2522 ------------------------------------- 2523 Proc2 25 26 27 | 0 0 28 | 29 0 2524 30 0 0 | 31 32 33 | 0 34 2525 .ve 2526 2527 This can be represented as a collection of submatrices as: 2528 2529 .vb 2530 A B C 2531 D E F 2532 G H I 2533 .ve 2534 2535 Where the submatrices A,B,C are owned by proc0, D,E,F are 2536 owned by proc1, G,H,I are owned by proc2. 2537 2538 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2539 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2540 The 'M','N' parameters are 8,8, and have the same values on all procs. 2541 2542 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2543 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2544 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2545 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2546 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2547 matrix, ans [DF] as another SeqAIJ matrix. 2548 2549 When d_nz, o_nz parameters are specified, d_nz storage elements are 2550 allocated for every row of the local diagonal submatrix, and o_nz 2551 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2552 One way to choose d_nz and o_nz is to use the max nonzerors per local 2553 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2554 In this case, the values of d_nz,o_nz are: 2555 .vb 2556 proc0 : dnz = 2, o_nz = 2 2557 proc1 : dnz = 3, o_nz = 2 2558 proc2 : dnz = 1, o_nz = 4 2559 .ve 2560 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2561 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2562 for proc3. i.e we are using 12+15+10=37 storage locations to store 2563 34 values. 2564 2565 When d_nnz, o_nnz parameters are specified, the storage is specified 2566 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2567 In the above case the values for d_nnz,o_nnz are: 2568 .vb 2569 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2570 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2571 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2572 .ve 2573 Here the space allocated is sum of all the above values i.e 34, and 2574 hence pre-allocation is perfect. 2575 2576 Level: intermediate 2577 2578 .keywords: matrix, aij, compressed row, sparse, parallel 2579 2580 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2581 MPIAIJ 2582 @*/ 2583 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2584 { 2585 PetscErrorCode ierr; 2586 PetscMPIInt size; 2587 2588 PetscFunctionBegin; 2589 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2590 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2591 if (size > 1) { 2592 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2593 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2594 } else { 2595 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2596 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2597 } 2598 PetscFunctionReturn(0); 2599 } 2600 2601 #undef __FUNCT__ 2602 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2603 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2604 { 2605 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2606 2607 PetscFunctionBegin; 2608 *Ad = a->A; 2609 *Ao = a->B; 2610 *colmap = a->garray; 2611 PetscFunctionReturn(0); 2612 } 2613 2614 #undef __FUNCT__ 2615 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2616 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2617 { 2618 PetscErrorCode ierr; 2619 PetscInt i; 2620 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2621 2622 PetscFunctionBegin; 2623 if (coloring->ctype == IS_COLORING_LOCAL) { 2624 ISColoringValue *allcolors,*colors; 2625 ISColoring ocoloring; 2626 2627 /* set coloring for diagonal portion */ 2628 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2629 2630 /* set coloring for off-diagonal portion */ 2631 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2632 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2633 for (i=0; i<a->B->n; i++) { 2634 colors[i] = allcolors[a->garray[i]]; 2635 } 2636 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2637 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2638 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2639 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2640 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2641 ISColoringValue *colors; 2642 PetscInt *larray; 2643 ISColoring ocoloring; 2644 2645 /* set coloring for diagonal portion */ 2646 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2647 for (i=0; i<a->A->n; i++) { 2648 larray[i] = i + a->cstart; 2649 } 2650 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2651 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2652 for (i=0; i<a->A->n; i++) { 2653 colors[i] = coloring->colors[larray[i]]; 2654 } 2655 ierr = PetscFree(larray);CHKERRQ(ierr); 2656 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2657 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2658 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2659 2660 /* set coloring for off-diagonal portion */ 2661 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2662 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2663 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2664 for (i=0; i<a->B->n; i++) { 2665 colors[i] = coloring->colors[larray[i]]; 2666 } 2667 ierr = PetscFree(larray);CHKERRQ(ierr); 2668 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2669 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2670 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2671 } else { 2672 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2673 } 2674 2675 PetscFunctionReturn(0); 2676 } 2677 2678 #if defined(PETSC_HAVE_ADIC) 2679 #undef __FUNCT__ 2680 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2681 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2682 { 2683 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2684 PetscErrorCode ierr; 2685 2686 PetscFunctionBegin; 2687 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2688 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2689 PetscFunctionReturn(0); 2690 } 2691 #endif 2692 2693 #undef __FUNCT__ 2694 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2695 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2696 { 2697 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2698 PetscErrorCode ierr; 2699 2700 PetscFunctionBegin; 2701 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2702 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2703 PetscFunctionReturn(0); 2704 } 2705 2706 #undef __FUNCT__ 2707 #define __FUNCT__ "MatMerge" 2708 /*@C 2709 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2710 matrices from each processor 2711 2712 Collective on MPI_Comm 2713 2714 Input Parameters: 2715 + comm - the communicators the parallel matrix will live on 2716 . inmat - the input sequential matrices 2717 . n - number of local columns (or PETSC_DECIDE) 2718 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2719 2720 Output Parameter: 2721 . outmat - the parallel matrix generated 2722 2723 Level: advanced 2724 2725 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2726 2727 @*/ 2728 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2729 { 2730 PetscErrorCode ierr; 2731 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2732 PetscInt *indx; 2733 PetscScalar *values; 2734 PetscMap columnmap,rowmap; 2735 2736 PetscFunctionBegin; 2737 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2738 /* 2739 PetscMPIInt rank; 2740 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2741 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2742 */ 2743 if (scall == MAT_INITIAL_MATRIX){ 2744 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2745 if (n == PETSC_DECIDE){ 2746 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2747 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2748 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2749 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2750 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2751 } 2752 2753 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2754 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2755 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2756 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2757 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2758 2759 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2760 for (i=0;i<m;i++) { 2761 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2762 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2763 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2764 } 2765 /* This routine will ONLY return MPIAIJ type matrix */ 2766 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2767 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2768 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2769 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2770 2771 } else if (scall == MAT_REUSE_MATRIX){ 2772 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2773 } else { 2774 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2775 } 2776 2777 for (i=0;i<m;i++) { 2778 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2779 I = i + rstart; 2780 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2781 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2782 } 2783 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2784 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2785 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2786 2787 PetscFunctionReturn(0); 2788 } 2789 2790 #undef __FUNCT__ 2791 #define __FUNCT__ "MatFileSplit" 2792 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2793 { 2794 PetscErrorCode ierr; 2795 PetscMPIInt rank; 2796 PetscInt m,N,i,rstart,nnz; 2797 size_t len; 2798 const PetscInt *indx; 2799 PetscViewer out; 2800 char *name; 2801 Mat B; 2802 const PetscScalar *values; 2803 2804 PetscFunctionBegin; 2805 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2806 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2807 /* Should this be the type of the diagonal block of A? */ 2808 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2809 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2810 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2811 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2812 for (i=0;i<m;i++) { 2813 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2814 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2815 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2816 } 2817 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2818 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2819 2820 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2821 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2822 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2823 sprintf(name,"%s.%d",outfile,rank); 2824 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2825 ierr = PetscFree(name); 2826 ierr = MatView(B,out);CHKERRQ(ierr); 2827 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2828 ierr = MatDestroy(B);CHKERRQ(ierr); 2829 PetscFunctionReturn(0); 2830 } 2831 2832 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2833 #undef __FUNCT__ 2834 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2835 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2836 { 2837 PetscErrorCode ierr; 2838 Mat_Merge_SeqsToMPI *merge; 2839 PetscObjectContainer container; 2840 2841 PetscFunctionBegin; 2842 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2843 if (container) { 2844 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2845 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2846 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2847 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2848 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2849 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2850 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2851 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2852 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2853 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2854 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2855 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2856 2857 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2858 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2859 } 2860 ierr = PetscFree(merge);CHKERRQ(ierr); 2861 2862 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2863 PetscFunctionReturn(0); 2864 } 2865 2866 #include "src/mat/utils/freespace.h" 2867 #include "petscbt.h" 2868 #undef __FUNCT__ 2869 #define __FUNCT__ "MatMerge_SeqsToMPI" 2870 /*@C 2871 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2872 matrices from each processor 2873 2874 Collective on MPI_Comm 2875 2876 Input Parameters: 2877 + comm - the communicators the parallel matrix will live on 2878 . seqmat - the input sequential matrices 2879 . m - number of local rows (or PETSC_DECIDE) 2880 . n - number of local columns (or PETSC_DECIDE) 2881 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2882 2883 Output Parameter: 2884 . mpimat - the parallel matrix generated 2885 2886 Level: advanced 2887 2888 Notes: 2889 The dimensions of the sequential matrix in each processor MUST be the same. 2890 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2891 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2892 @*/ 2893 static PetscEvent logkey_seqstompinum = 0; 2894 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2895 { 2896 PetscErrorCode ierr; 2897 MPI_Comm comm=mpimat->comm; 2898 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2899 PetscMPIInt size,rank,taga,*len_s; 2900 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2901 PetscInt proc,m; 2902 PetscInt **buf_ri,**buf_rj; 2903 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2904 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2905 MPI_Request *s_waits,*r_waits; 2906 MPI_Status *status; 2907 MatScalar *aa=a->a,**abuf_r,*ba_i; 2908 Mat_Merge_SeqsToMPI *merge; 2909 PetscObjectContainer container; 2910 2911 PetscFunctionBegin; 2912 if (!logkey_seqstompinum) { 2913 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2914 } 2915 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2916 2917 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2918 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2919 2920 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2921 if (container) { 2922 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2923 } 2924 bi = merge->bi; 2925 bj = merge->bj; 2926 buf_ri = merge->buf_ri; 2927 buf_rj = merge->buf_rj; 2928 2929 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2930 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2931 len_s = merge->len_s; 2932 2933 /* send and recv matrix values */ 2934 /*-----------------------------*/ 2935 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2936 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2937 2938 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2939 for (proc=0,k=0; proc<size; proc++){ 2940 if (!len_s[proc]) continue; 2941 i = owners[proc]; 2942 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2943 k++; 2944 } 2945 2946 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2947 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2948 ierr = PetscFree(status);CHKERRQ(ierr); 2949 2950 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2951 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2952 2953 /* insert mat values of mpimat */ 2954 /*----------------------------*/ 2955 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2956 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2957 nextrow = buf_ri_k + merge->nrecv; 2958 nextai = nextrow + merge->nrecv; 2959 2960 for (k=0; k<merge->nrecv; k++){ 2961 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2962 nrows = *(buf_ri_k[k]); 2963 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2964 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2965 } 2966 2967 /* set values of ba */ 2968 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2969 for (i=0; i<m; i++) { 2970 arow = owners[rank] + i; 2971 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2972 bnzi = bi[i+1] - bi[i]; 2973 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2974 2975 /* add local non-zero vals of this proc's seqmat into ba */ 2976 anzi = ai[arow+1] - ai[arow]; 2977 aj = a->j + ai[arow]; 2978 aa = a->a + ai[arow]; 2979 nextaj = 0; 2980 for (j=0; nextaj<anzi; j++){ 2981 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2982 ba_i[j] += aa[nextaj++]; 2983 } 2984 } 2985 2986 /* add received vals into ba */ 2987 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2988 /* i-th row */ 2989 if (i == *nextrow[k]) { 2990 anzi = *(nextai[k]+1) - *nextai[k]; 2991 aj = buf_rj[k] + *(nextai[k]); 2992 aa = abuf_r[k] + *(nextai[k]); 2993 nextaj = 0; 2994 for (j=0; nextaj<anzi; j++){ 2995 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2996 ba_i[j] += aa[nextaj++]; 2997 } 2998 } 2999 nextrow[k]++; nextai[k]++; 3000 } 3001 } 3002 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3003 } 3004 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3005 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3006 3007 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3008 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3009 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3010 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3011 PetscFunctionReturn(0); 3012 } 3013 static PetscEvent logkey_seqstompisym = 0; 3014 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3015 { 3016 PetscErrorCode ierr; 3017 Mat B_mpi; 3018 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3019 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3020 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3021 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3022 PetscInt len,proc,*dnz,*onz; 3023 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3024 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3025 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3026 MPI_Status *status; 3027 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3028 PetscBT lnkbt; 3029 Mat_Merge_SeqsToMPI *merge; 3030 PetscObjectContainer container; 3031 3032 PetscFunctionBegin; 3033 if (!logkey_seqstompisym) { 3034 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3035 } 3036 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3037 3038 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3039 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3040 3041 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3042 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3043 3044 /* determine row ownership */ 3045 /*---------------------------------------------------------*/ 3046 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3047 if (m == PETSC_DECIDE) { 3048 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3049 } else { 3050 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3051 } 3052 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3053 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3054 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3055 3056 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3057 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3058 3059 /* determine the number of messages to send, their lengths */ 3060 /*---------------------------------------------------------*/ 3061 len_s = merge->len_s; 3062 3063 len = 0; /* length of buf_si[] */ 3064 merge->nsend = 0; 3065 for (proc=0; proc<size; proc++){ 3066 len_si[proc] = 0; 3067 if (proc == rank){ 3068 len_s[proc] = 0; 3069 } else { 3070 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3071 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3072 } 3073 if (len_s[proc]) { 3074 merge->nsend++; 3075 nrows = 0; 3076 for (i=owners[proc]; i<owners[proc+1]; i++){ 3077 if (ai[i+1] > ai[i]) nrows++; 3078 } 3079 len_si[proc] = 2*(nrows+1); 3080 len += len_si[proc]; 3081 } 3082 } 3083 3084 /* determine the number and length of messages to receive for ij-structure */ 3085 /*-------------------------------------------------------------------------*/ 3086 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3087 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3088 3089 /* post the Irecv of j-structure */ 3090 /*-------------------------------*/ 3091 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3092 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3093 3094 /* post the Isend of j-structure */ 3095 /*--------------------------------*/ 3096 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3097 sj_waits = si_waits + merge->nsend; 3098 3099 for (proc=0, k=0; proc<size; proc++){ 3100 if (!len_s[proc]) continue; 3101 i = owners[proc]; 3102 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3103 k++; 3104 } 3105 3106 /* receives and sends of j-structure are complete */ 3107 /*------------------------------------------------*/ 3108 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3109 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3110 3111 /* send and recv i-structure */ 3112 /*---------------------------*/ 3113 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3114 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3115 3116 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3117 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3118 for (proc=0,k=0; proc<size; proc++){ 3119 if (!len_s[proc]) continue; 3120 /* form outgoing message for i-structure: 3121 buf_si[0]: nrows to be sent 3122 [1:nrows]: row index (global) 3123 [nrows+1:2*nrows+1]: i-structure index 3124 */ 3125 /*-------------------------------------------*/ 3126 nrows = len_si[proc]/2 - 1; 3127 buf_si_i = buf_si + nrows+1; 3128 buf_si[0] = nrows; 3129 buf_si_i[0] = 0; 3130 nrows = 0; 3131 for (i=owners[proc]; i<owners[proc+1]; i++){ 3132 anzi = ai[i+1] - ai[i]; 3133 if (anzi) { 3134 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3135 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3136 nrows++; 3137 } 3138 } 3139 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3140 k++; 3141 buf_si += len_si[proc]; 3142 } 3143 3144 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3145 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3146 3147 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3148 for (i=0; i<merge->nrecv; i++){ 3149 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3150 } 3151 3152 ierr = PetscFree(len_si);CHKERRQ(ierr); 3153 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3154 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3155 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3156 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3157 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3158 ierr = PetscFree(status);CHKERRQ(ierr); 3159 3160 /* compute a local seq matrix in each processor */ 3161 /*----------------------------------------------*/ 3162 /* allocate bi array and free space for accumulating nonzero column info */ 3163 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3164 bi[0] = 0; 3165 3166 /* create and initialize a linked list */ 3167 nlnk = N+1; 3168 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3169 3170 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3171 len = 0; 3172 len = ai[owners[rank+1]] - ai[owners[rank]]; 3173 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3174 current_space = free_space; 3175 3176 /* determine symbolic info for each local row */ 3177 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3178 nextrow = buf_ri_k + merge->nrecv; 3179 nextai = nextrow + merge->nrecv; 3180 for (k=0; k<merge->nrecv; k++){ 3181 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3182 nrows = *buf_ri_k[k]; 3183 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3184 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3185 } 3186 3187 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3188 len = 0; 3189 for (i=0;i<m;i++) { 3190 bnzi = 0; 3191 /* add local non-zero cols of this proc's seqmat into lnk */ 3192 arow = owners[rank] + i; 3193 anzi = ai[arow+1] - ai[arow]; 3194 aj = a->j + ai[arow]; 3195 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3196 bnzi += nlnk; 3197 /* add received col data into lnk */ 3198 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3199 if (i == *nextrow[k]) { /* i-th row */ 3200 anzi = *(nextai[k]+1) - *nextai[k]; 3201 aj = buf_rj[k] + *nextai[k]; 3202 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3203 bnzi += nlnk; 3204 nextrow[k]++; nextai[k]++; 3205 } 3206 } 3207 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3208 3209 /* if free space is not available, make more free space */ 3210 if (current_space->local_remaining<bnzi) { 3211 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3212 nspacedouble++; 3213 } 3214 /* copy data into free space, then initialize lnk */ 3215 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3216 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3217 3218 current_space->array += bnzi; 3219 current_space->local_used += bnzi; 3220 current_space->local_remaining -= bnzi; 3221 3222 bi[i+1] = bi[i] + bnzi; 3223 } 3224 3225 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3226 3227 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3228 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3229 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3230 3231 /* create symbolic parallel matrix B_mpi */ 3232 /*---------------------------------------*/ 3233 if (n==PETSC_DECIDE) { 3234 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3235 } else { 3236 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3237 } 3238 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3239 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3240 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3241 3242 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3243 B_mpi->assembled = PETSC_FALSE; 3244 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3245 merge->bi = bi; 3246 merge->bj = bj; 3247 merge->buf_ri = buf_ri; 3248 merge->buf_rj = buf_rj; 3249 merge->coi = PETSC_NULL; 3250 merge->coj = PETSC_NULL; 3251 merge->owners_co = PETSC_NULL; 3252 3253 /* attach the supporting struct to B_mpi for reuse */ 3254 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3255 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3256 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3257 *mpimat = B_mpi; 3258 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3259 PetscFunctionReturn(0); 3260 } 3261 3262 static PetscEvent logkey_seqstompi = 0; 3263 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3264 { 3265 PetscErrorCode ierr; 3266 3267 PetscFunctionBegin; 3268 if (!logkey_seqstompi) { 3269 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3270 } 3271 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3272 if (scall == MAT_INITIAL_MATRIX){ 3273 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3274 } 3275 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3276 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3277 PetscFunctionReturn(0); 3278 } 3279 static PetscEvent logkey_getlocalmat = 0; 3280 #undef __FUNCT__ 3281 #define __FUNCT__ "MatGetLocalMat" 3282 /*@C 3283 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3284 3285 Not Collective 3286 3287 Input Parameters: 3288 + A - the matrix 3289 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3290 3291 Output Parameter: 3292 . A_loc - the local sequential matrix generated 3293 3294 Level: developer 3295 3296 @*/ 3297 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3298 { 3299 PetscErrorCode ierr; 3300 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3301 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3302 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3303 PetscScalar *aa=a->a,*ba=b->a,*ca; 3304 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3305 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3306 3307 PetscFunctionBegin; 3308 if (!logkey_getlocalmat) { 3309 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3310 } 3311 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3312 if (scall == MAT_INITIAL_MATRIX){ 3313 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3314 ci[0] = 0; 3315 for (i=0; i<am; i++){ 3316 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3317 } 3318 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3319 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3320 k = 0; 3321 for (i=0; i<am; i++) { 3322 ncols_o = bi[i+1] - bi[i]; 3323 ncols_d = ai[i+1] - ai[i]; 3324 /* off-diagonal portion of A */ 3325 for (jo=0; jo<ncols_o; jo++) { 3326 col = cmap[*bj]; 3327 if (col >= cstart) break; 3328 cj[k] = col; bj++; 3329 ca[k++] = *ba++; 3330 } 3331 /* diagonal portion of A */ 3332 for (j=0; j<ncols_d; j++) { 3333 cj[k] = cstart + *aj++; 3334 ca[k++] = *aa++; 3335 } 3336 /* off-diagonal portion of A */ 3337 for (j=jo; j<ncols_o; j++) { 3338 cj[k] = cmap[*bj++]; 3339 ca[k++] = *ba++; 3340 } 3341 } 3342 /* put together the new matrix */ 3343 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3344 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3345 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3346 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3347 mat->freedata = PETSC_TRUE; 3348 mat->nonew = 0; 3349 } else if (scall == MAT_REUSE_MATRIX){ 3350 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3351 ci = mat->i; cj = mat->j; ca = mat->a; 3352 for (i=0; i<am; i++) { 3353 /* off-diagonal portion of A */ 3354 ncols_o = bi[i+1] - bi[i]; 3355 for (jo=0; jo<ncols_o; jo++) { 3356 col = cmap[*bj]; 3357 if (col >= cstart) break; 3358 *ca++ = *ba++; bj++; 3359 } 3360 /* diagonal portion of A */ 3361 ncols_d = ai[i+1] - ai[i]; 3362 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3363 /* off-diagonal portion of A */ 3364 for (j=jo; j<ncols_o; j++) { 3365 *ca++ = *ba++; bj++; 3366 } 3367 } 3368 } else { 3369 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3370 } 3371 3372 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3373 PetscFunctionReturn(0); 3374 } 3375 3376 static PetscEvent logkey_getlocalmatcondensed = 0; 3377 #undef __FUNCT__ 3378 #define __FUNCT__ "MatGetLocalMatCondensed" 3379 /*@C 3380 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3381 3382 Not Collective 3383 3384 Input Parameters: 3385 + A - the matrix 3386 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3387 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3388 3389 Output Parameter: 3390 . A_loc - the local sequential matrix generated 3391 3392 Level: developer 3393 3394 @*/ 3395 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3396 { 3397 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3398 PetscErrorCode ierr; 3399 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3400 IS isrowa,iscola; 3401 Mat *aloc; 3402 3403 PetscFunctionBegin; 3404 if (!logkey_getlocalmatcondensed) { 3405 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3406 } 3407 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3408 if (!row){ 3409 start = a->rstart; end = a->rend; 3410 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3411 } else { 3412 isrowa = *row; 3413 } 3414 if (!col){ 3415 start = a->cstart; 3416 cmap = a->garray; 3417 nzA = a->A->n; 3418 nzB = a->B->n; 3419 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3420 ncols = 0; 3421 for (i=0; i<nzB; i++) { 3422 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3423 else break; 3424 } 3425 imark = i; 3426 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3427 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3428 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3429 ierr = PetscFree(idx);CHKERRQ(ierr); 3430 } else { 3431 iscola = *col; 3432 } 3433 if (scall != MAT_INITIAL_MATRIX){ 3434 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3435 aloc[0] = *A_loc; 3436 } 3437 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3438 *A_loc = aloc[0]; 3439 ierr = PetscFree(aloc);CHKERRQ(ierr); 3440 if (!row){ 3441 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3442 } 3443 if (!col){ 3444 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3445 } 3446 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3447 PetscFunctionReturn(0); 3448 } 3449 3450 static PetscEvent logkey_GetBrowsOfAcols = 0; 3451 #undef __FUNCT__ 3452 #define __FUNCT__ "MatGetBrowsOfAcols" 3453 /*@C 3454 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3455 3456 Collective on Mat 3457 3458 Input Parameters: 3459 + A,B - the matrices in mpiaij format 3460 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3461 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3462 3463 Output Parameter: 3464 + rowb, colb - index sets of rows and columns of B to extract 3465 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3466 - B_seq - the sequential matrix generated 3467 3468 Level: developer 3469 3470 @*/ 3471 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3472 { 3473 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3474 PetscErrorCode ierr; 3475 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3476 IS isrowb,iscolb; 3477 Mat *bseq; 3478 3479 PetscFunctionBegin; 3480 if (a->cstart != b->rstart || a->cend != b->rend){ 3481 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3482 } 3483 if (!logkey_GetBrowsOfAcols) { 3484 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3485 } 3486 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3487 3488 if (scall == MAT_INITIAL_MATRIX){ 3489 start = a->cstart; 3490 cmap = a->garray; 3491 nzA = a->A->n; 3492 nzB = a->B->n; 3493 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3494 ncols = 0; 3495 for (i=0; i<nzB; i++) { /* row < local row index */ 3496 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3497 else break; 3498 } 3499 imark = i; 3500 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3501 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3502 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3503 ierr = PetscFree(idx);CHKERRQ(ierr); 3504 *brstart = imark; 3505 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3506 } else { 3507 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3508 isrowb = *rowb; iscolb = *colb; 3509 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3510 bseq[0] = *B_seq; 3511 } 3512 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3513 *B_seq = bseq[0]; 3514 ierr = PetscFree(bseq);CHKERRQ(ierr); 3515 if (!rowb){ 3516 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3517 } else { 3518 *rowb = isrowb; 3519 } 3520 if (!colb){ 3521 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3522 } else { 3523 *colb = iscolb; 3524 } 3525 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3526 PetscFunctionReturn(0); 3527 } 3528 3529 static PetscEvent logkey_GetBrowsOfAocols = 0; 3530 #undef __FUNCT__ 3531 #define __FUNCT__ "MatGetBrowsOfAoCols" 3532 /*@C 3533 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3534 of the OFF-DIAGONAL portion of local A 3535 3536 Collective on Mat 3537 3538 Input Parameters: 3539 + A,B - the matrices in mpiaij format 3540 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3541 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3542 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3543 3544 Output Parameter: 3545 + B_oth - the sequential matrix generated 3546 3547 Level: developer 3548 3549 @*/ 3550 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3551 { 3552 VecScatter_MPI_General *gen_to,*gen_from; 3553 PetscErrorCode ierr; 3554 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3555 Mat_SeqAIJ *b_oth; 3556 VecScatter ctx=a->Mvctx; 3557 MPI_Comm comm=ctx->comm; 3558 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3559 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3560 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3561 PetscInt i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3562 MPI_Request *rwaits,*swaits; 3563 MPI_Status *sstatus,rstatus; 3564 PetscInt *cols; 3565 PetscScalar *vals; 3566 PetscMPIInt j; 3567 3568 PetscFunctionBegin; 3569 if (a->cstart != b->rstart || a->cend != b->rend){ 3570 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3571 } 3572 if (!logkey_GetBrowsOfAocols) { 3573 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3574 } 3575 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3576 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3577 3578 gen_to = (VecScatter_MPI_General*)ctx->todata; 3579 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3580 rvalues = gen_from->values; /* holds the length of sending row */ 3581 svalues = gen_to->values; /* holds the length of receiving row */ 3582 nrecvs = gen_from->n; 3583 nsends = gen_to->n; 3584 rwaits = gen_from->requests; 3585 swaits = gen_to->requests; 3586 srow = gen_to->indices; /* local row index to be sent */ 3587 rstarts = gen_from->starts; 3588 sstarts = gen_to->starts; 3589 rprocs = gen_from->procs; 3590 sprocs = gen_to->procs; 3591 sstatus = gen_to->sstatus; 3592 3593 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3594 if (scall == MAT_INITIAL_MATRIX){ 3595 /* i-array */ 3596 /*---------*/ 3597 /* post receives */ 3598 for (i=0; i<nrecvs; i++){ 3599 rowlen = (PetscInt*)rvalues + rstarts[i]; 3600 nrows = rstarts[i+1]-rstarts[i]; 3601 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3602 } 3603 3604 /* pack the outgoing message */ 3605 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3606 rstartsj = sstartsj + nsends +1; 3607 sstartsj[0] = 0; rstartsj[0] = 0; 3608 len = 0; /* total length of j or a array to be sent */ 3609 k = 0; 3610 for (i=0; i<nsends; i++){ 3611 rowlen = (PetscInt*)svalues + sstarts[i]; 3612 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3613 for (j=0; j<nrows; j++) { 3614 row = srow[k] + b->rowners[rank]; /* global row idx */ 3615 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3616 len += rowlen[j]; 3617 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3618 k++; 3619 } 3620 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3621 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3622 } 3623 /* recvs and sends of i-array are completed */ 3624 i = nrecvs; 3625 while (i--) { 3626 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3627 } 3628 if (nsends) { 3629 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3630 } 3631 /* allocate buffers for sending j and a arrays */ 3632 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3633 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3634 3635 /* create i-array of B_oth */ 3636 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3637 b_othi[0] = 0; 3638 len = 0; /* total length of j or a array to be received */ 3639 k = 0; 3640 for (i=0; i<nrecvs; i++){ 3641 rowlen = (PetscInt*)rvalues + rstarts[i]; 3642 nrows = rstarts[i+1]-rstarts[i]; 3643 for (j=0; j<nrows; j++) { 3644 b_othi[k+1] = b_othi[k] + rowlen[j]; 3645 len += rowlen[j]; k++; 3646 } 3647 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3648 } 3649 3650 /* allocate space for j and a arrrays of B_oth */ 3651 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3652 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3653 3654 /* j-array */ 3655 /*---------*/ 3656 /* post receives of j-array */ 3657 for (i=0; i<nrecvs; i++){ 3658 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3659 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3660 } 3661 k = 0; 3662 for (i=0; i<nsends; i++){ 3663 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3664 bufJ = bufj+sstartsj[i]; 3665 for (j=0; j<nrows; j++) { 3666 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3667 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3668 for (l=0; l<ncols; l++){ 3669 *bufJ++ = cols[l]; 3670 } 3671 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3672 } 3673 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3674 } 3675 3676 /* recvs and sends of j-array are completed */ 3677 i = nrecvs; 3678 while (i--) { 3679 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3680 } 3681 if (nsends) { 3682 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3683 } 3684 } else if (scall == MAT_REUSE_MATRIX){ 3685 sstartsj = *startsj; 3686 rstartsj = sstartsj + nsends +1; 3687 bufa = *bufa_ptr; 3688 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3689 b_otha = b_oth->a; 3690 } else { 3691 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3692 } 3693 3694 /* a-array */ 3695 /*---------*/ 3696 /* post receives of a-array */ 3697 for (i=0; i<nrecvs; i++){ 3698 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3699 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3700 } 3701 k = 0; 3702 for (i=0; i<nsends; i++){ 3703 nrows = sstarts[i+1]-sstarts[i]; 3704 bufA = bufa+sstartsj[i]; 3705 for (j=0; j<nrows; j++) { 3706 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3707 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3708 for (l=0; l<ncols; l++){ 3709 *bufA++ = vals[l]; 3710 } 3711 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3712 3713 } 3714 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3715 } 3716 /* recvs and sends of a-array are completed */ 3717 i = nrecvs; 3718 while (i--) { 3719 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3720 } 3721 if (nsends) { 3722 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3723 } 3724 3725 if (scall == MAT_INITIAL_MATRIX){ 3726 /* put together the new matrix */ 3727 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3728 3729 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3730 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3731 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3732 b_oth->freedata = PETSC_TRUE; 3733 b_oth->nonew = 0; 3734 3735 ierr = PetscFree(bufj);CHKERRQ(ierr); 3736 if (!startsj || !bufa_ptr){ 3737 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3738 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3739 } else { 3740 *startsj = sstartsj; 3741 *bufa_ptr = bufa; 3742 } 3743 } 3744 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3745 3746 PetscFunctionReturn(0); 3747 } 3748 3749 /*MC 3750 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3751 3752 Options Database Keys: 3753 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3754 3755 Level: beginner 3756 3757 .seealso: MatCreateMPIAIJ 3758 M*/ 3759 3760 EXTERN_C_BEGIN 3761 #undef __FUNCT__ 3762 #define __FUNCT__ "MatCreate_MPIAIJ" 3763 PetscErrorCode MatCreate_MPIAIJ(Mat B) 3764 { 3765 Mat_MPIAIJ *b; 3766 PetscErrorCode ierr; 3767 PetscInt i; 3768 PetscMPIInt size; 3769 3770 PetscFunctionBegin; 3771 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3772 3773 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3774 B->data = (void*)b; 3775 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3776 B->factor = 0; 3777 B->bs = 1; 3778 B->assembled = PETSC_FALSE; 3779 B->mapping = 0; 3780 3781 B->insertmode = NOT_SET_VALUES; 3782 b->size = size; 3783 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3784 3785 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3786 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3787 3788 /* the information in the maps duplicates the information computed below, eventually 3789 we should remove the duplicate information that is not contained in the maps */ 3790 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3791 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3792 3793 /* build local table of row and column ownerships */ 3794 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3795 ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3796 b->cowners = b->rowners + b->size + 2; 3797 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3798 b->rowners[0] = 0; 3799 for (i=2; i<=b->size; i++) { 3800 b->rowners[i] += b->rowners[i-1]; 3801 } 3802 b->rstart = b->rowners[b->rank]; 3803 b->rend = b->rowners[b->rank+1]; 3804 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3805 b->cowners[0] = 0; 3806 for (i=2; i<=b->size; i++) { 3807 b->cowners[i] += b->cowners[i-1]; 3808 } 3809 b->cstart = b->cowners[b->rank]; 3810 b->cend = b->cowners[b->rank+1]; 3811 3812 /* build cache for off array entries formed */ 3813 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3814 b->donotstash = PETSC_FALSE; 3815 b->colmap = 0; 3816 b->garray = 0; 3817 b->roworiented = PETSC_TRUE; 3818 3819 /* stuff used for matrix vector multiply */ 3820 b->lvec = PETSC_NULL; 3821 b->Mvctx = PETSC_NULL; 3822 3823 /* stuff for MatGetRow() */ 3824 b->rowindices = 0; 3825 b->rowvalues = 0; 3826 b->getrowactive = PETSC_FALSE; 3827 3828 /* Explicitly create 2 MATSEQAIJ matrices. */ 3829 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3830 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3831 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3832 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3833 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3834 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3835 3836 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3837 "MatStoreValues_MPIAIJ", 3838 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3839 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3840 "MatRetrieveValues_MPIAIJ", 3841 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3842 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3843 "MatGetDiagonalBlock_MPIAIJ", 3844 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3845 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3846 "MatIsTranspose_MPIAIJ", 3847 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3848 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3849 "MatMPIAIJSetPreallocation_MPIAIJ", 3850 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3851 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3852 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3853 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3854 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3855 "MatDiagonalScaleLocal_MPIAIJ", 3856 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3857 PetscFunctionReturn(0); 3858 } 3859 EXTERN_C_END 3860