1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (PetscInt*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(PetscInt));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (PetscInt*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(PetscInt));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 PetscErrorCode ierr; 191 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 192 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 PetscInt nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 PetscErrorCode ierr; 277 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 278 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 279 280 PetscFunctionBegin; 281 for (i=0; i<m; i++) { 282 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 283 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 284 if (idxm[i] >= rstart && idxm[i] < rend) { 285 row = idxm[i] - rstart; 286 for (j=0; j<n; j++) { 287 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 288 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 289 if (idxn[j] >= cstart && idxn[j] < cend){ 290 col = idxn[j] - cstart; 291 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 292 } else { 293 if (!aij->colmap) { 294 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined (PETSC_USE_CTABLE) 297 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 298 col --; 299 #else 300 col = aij->colmap[idxn[j]] - 1; 301 #endif 302 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 303 else { 304 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 305 } 306 } 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 310 } 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 318 { 319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 320 PetscErrorCode ierr; 321 PetscInt nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 337 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 338 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 339 PetscFunctionReturn(0); 340 } 341 342 #undef __FUNCT__ 343 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 344 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 345 { 346 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 347 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 348 PetscErrorCode ierr; 349 PetscMPIInt n; 350 PetscInt i,j,rstart,ncols,flg; 351 PetscInt *row,*col,other_disassembled; 352 PetscScalar *val; 353 InsertMode addv = mat->insertmode; 354 355 PetscFunctionBegin; 356 if (!aij->donotstash) { 357 while (1) { 358 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 359 if (!flg) break; 360 361 for (i=0; i<n;) { 362 /* Now identify the consecutive vals belonging to the same row */ 363 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 364 if (j < n) ncols = j-i; 365 else ncols = n-i; 366 /* Now assemble all these values with a single function call */ 367 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 368 i = j; 369 } 370 } 371 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 372 } 373 374 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 375 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 376 377 /* determine if any processor has disassembled, if so we must 378 also disassemble ourselfs, in order that we may reassemble. */ 379 /* 380 if nonzero structure of submatrix B cannot change then we know that 381 no processor disassembled thus we can skip this stuff 382 */ 383 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 384 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 385 if (mat->was_assembled && !other_disassembled) { 386 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 387 /* reaccess the b because aij->B was changed */ 388 b = (Mat_SeqAIJ *)aij->B->data; 389 } 390 } 391 392 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 393 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 396 b->inode.use = PETSC_FALSE; 397 b->compressedrow.use = PETSC_TRUE; 398 c->compressedrow.checked = PETSC_FALSE; 399 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 400 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 401 402 if (aij->rowvalues) { 403 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 404 aij->rowvalues = 0; 405 } 406 407 /* used by MatAXPY() */ 408 a->xtoy = 0; b->xtoy = 0; 409 a->XtoY = 0; b->XtoY = 0; 410 411 PetscFunctionReturn(0); 412 } 413 414 #undef __FUNCT__ 415 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 416 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 417 { 418 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 419 PetscErrorCode ierr; 420 421 PetscFunctionBegin; 422 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 423 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatZeroRows_MPIAIJ" 429 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 430 { 431 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 432 PetscErrorCode ierr; 433 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag; 434 PetscInt i,N,*rows,*owners = l->rowners; 435 PetscInt *nprocs,j,idx,nsends,row; 436 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 437 PetscInt *rvalues,count,base,slen,*source; 438 PetscInt *lens,*lrows,*values,rstart=l->rstart; 439 MPI_Comm comm = A->comm; 440 MPI_Request *send_waits,*recv_waits; 441 MPI_Status recv_status,*send_status; 442 IS istmp; 443 PetscTruth found; 444 445 PetscFunctionBegin; 446 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 447 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 448 449 /* first count number of contributors to each processor */ 450 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 451 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 452 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 453 for (i=0; i<N; i++) { 454 idx = rows[i]; 455 found = PETSC_FALSE; 456 for (j=0; j<size; j++) { 457 if (idx >= owners[j] && idx < owners[j+1]) { 458 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 459 } 460 } 461 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 462 } 463 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 464 465 /* inform other processors of number of messages and max length*/ 466 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 467 468 /* post receives: */ 469 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 470 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 471 for (i=0; i<nrecvs; i++) { 472 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 473 } 474 475 /* do sends: 476 1) starts[i] gives the starting index in svalues for stuff going to 477 the ith processor 478 */ 479 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 480 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 481 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 482 starts[0] = 0; 483 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 484 for (i=0; i<N; i++) { 485 svalues[starts[owner[i]]++] = rows[i]; 486 } 487 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 488 489 starts[0] = 0; 490 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 491 count = 0; 492 for (i=0; i<size; i++) { 493 if (nprocs[2*i+1]) { 494 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 495 } 496 } 497 ierr = PetscFree(starts);CHKERRQ(ierr); 498 499 base = owners[rank]; 500 501 /* wait on receives */ 502 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 503 source = lens + nrecvs; 504 count = nrecvs; slen = 0; 505 while (count) { 506 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 507 /* unpack receives into our local space */ 508 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 509 source[imdex] = recv_status.MPI_SOURCE; 510 lens[imdex] = n; 511 slen += n; 512 count--; 513 } 514 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 515 516 /* move the data into the send scatter */ 517 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 518 count = 0; 519 for (i=0; i<nrecvs; i++) { 520 values = rvalues + i*nmax; 521 for (j=0; j<lens[i]; j++) { 522 lrows[count++] = values[j] - base; 523 } 524 } 525 ierr = PetscFree(rvalues);CHKERRQ(ierr); 526 ierr = PetscFree(lens);CHKERRQ(ierr); 527 ierr = PetscFree(owner);CHKERRQ(ierr); 528 ierr = PetscFree(nprocs);CHKERRQ(ierr); 529 530 /* actually zap the local rows */ 531 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 532 PetscLogObjectParent(A,istmp); 533 534 /* 535 Zero the required rows. If the "diagonal block" of the matrix 536 is square and the user wishes to set the diagonal we use seperate 537 code so that MatSetValues() is not called for each diagonal allocating 538 new memory, thus calling lots of mallocs and slowing things down. 539 540 Contributed by: Mathew Knepley 541 */ 542 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 543 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 544 if (diag && (l->A->M == l->A->N)) { 545 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 546 } else if (diag) { 547 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 548 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 549 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 550 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 551 } 552 for (i = 0; i < slen; i++) { 553 row = lrows[i] + rstart; 554 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 555 } 556 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 557 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 558 } else { 559 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 560 } 561 ierr = ISDestroy(istmp);CHKERRQ(ierr); 562 ierr = PetscFree(lrows);CHKERRQ(ierr); 563 564 /* wait on sends */ 565 if (nsends) { 566 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 567 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 568 ierr = PetscFree(send_status);CHKERRQ(ierr); 569 } 570 ierr = PetscFree(send_waits);CHKERRQ(ierr); 571 ierr = PetscFree(svalues);CHKERRQ(ierr); 572 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatMult_MPIAIJ" 578 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 579 { 580 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 581 PetscErrorCode ierr; 582 PetscInt nt; 583 584 PetscFunctionBegin; 585 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 586 if (nt != A->n) { 587 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 588 } 589 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 590 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 591 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 592 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 593 PetscFunctionReturn(0); 594 } 595 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatMultAdd_MPIAIJ" 598 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 599 { 600 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 601 PetscErrorCode ierr; 602 603 PetscFunctionBegin; 604 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 605 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 606 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 607 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 608 PetscFunctionReturn(0); 609 } 610 611 #undef __FUNCT__ 612 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 613 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 614 { 615 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 616 PetscErrorCode ierr; 617 618 PetscFunctionBegin; 619 /* do nondiagonal part */ 620 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 621 /* send it on its way */ 622 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 623 /* do local part */ 624 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 625 /* receive remote parts: note this assumes the values are not actually */ 626 /* inserted in yy until the next line, which is true for my implementation*/ 627 /* but is not perhaps always true. */ 628 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 629 PetscFunctionReturn(0); 630 } 631 632 EXTERN_C_BEGIN 633 #undef __FUNCT__ 634 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 635 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 636 { 637 MPI_Comm comm; 638 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 639 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 640 IS Me,Notme; 641 PetscErrorCode ierr; 642 PetscInt M,N,first,last,*notme,i; 643 PetscMPIInt size; 644 645 PetscFunctionBegin; 646 647 /* Easy test: symmetric diagonal block */ 648 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 649 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 650 if (!*f) PetscFunctionReturn(0); 651 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 652 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 653 if (size == 1) PetscFunctionReturn(0); 654 655 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 656 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 657 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 658 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 659 for (i=0; i<first; i++) notme[i] = i; 660 for (i=last; i<M; i++) notme[i-last+first] = i; 661 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 662 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 663 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 664 Aoff = Aoffs[0]; 665 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 666 Boff = Boffs[0]; 667 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 668 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 669 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 670 ierr = ISDestroy(Me);CHKERRQ(ierr); 671 ierr = ISDestroy(Notme);CHKERRQ(ierr); 672 673 PetscFunctionReturn(0); 674 } 675 EXTERN_C_END 676 677 #undef __FUNCT__ 678 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 679 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 680 { 681 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 682 PetscErrorCode ierr; 683 684 PetscFunctionBegin; 685 /* do nondiagonal part */ 686 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 687 /* send it on its way */ 688 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 689 /* do local part */ 690 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 691 /* receive remote parts: note this assumes the values are not actually */ 692 /* inserted in yy until the next line, which is true for my implementation*/ 693 /* but is not perhaps always true. */ 694 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 695 PetscFunctionReturn(0); 696 } 697 698 /* 699 This only works correctly for square matrices where the subblock A->A is the 700 diagonal block 701 */ 702 #undef __FUNCT__ 703 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 704 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 705 { 706 PetscErrorCode ierr; 707 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 708 709 PetscFunctionBegin; 710 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 711 if (a->rstart != a->cstart || a->rend != a->cend) { 712 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 713 } 714 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 715 PetscFunctionReturn(0); 716 } 717 718 #undef __FUNCT__ 719 #define __FUNCT__ "MatScale_MPIAIJ" 720 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 721 { 722 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 723 PetscErrorCode ierr; 724 725 PetscFunctionBegin; 726 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 727 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatDestroy_MPIAIJ" 733 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 734 { 735 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 736 PetscErrorCode ierr; 737 738 PetscFunctionBegin; 739 #if defined(PETSC_USE_LOG) 740 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 741 #endif 742 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 743 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 744 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 745 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 746 #if defined (PETSC_USE_CTABLE) 747 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 748 #else 749 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 750 #endif 751 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 752 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 753 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 754 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 755 ierr = PetscFree(aij);CHKERRQ(ierr); 756 757 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 758 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 759 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 760 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 761 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 762 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 763 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 #undef __FUNCT__ 768 #define __FUNCT__ "MatView_MPIAIJ_Binary" 769 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 770 { 771 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 772 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 773 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 774 PetscErrorCode ierr; 775 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 776 int fd; 777 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 778 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 779 PetscScalar *column_values; 780 781 PetscFunctionBegin; 782 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 783 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 784 nz = A->nz + B->nz; 785 if (!rank) { 786 header[0] = MAT_FILE_COOKIE; 787 header[1] = mat->M; 788 header[2] = mat->N; 789 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 790 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 791 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 792 /* get largest number of rows any processor has */ 793 rlen = mat->m; 794 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 795 for (i=1; i<size; i++) { 796 rlen = PetscMax(rlen,range[i+1] - range[i]); 797 } 798 } else { 799 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 800 rlen = mat->m; 801 } 802 803 /* load up the local row counts */ 804 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 805 for (i=0; i<mat->m; i++) { 806 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 807 } 808 809 /* store the row lengths to the file */ 810 if (!rank) { 811 MPI_Status status; 812 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 813 for (i=1; i<size; i++) { 814 rlen = range[i+1] - range[i]; 815 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 816 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 817 } 818 } else { 819 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 820 } 821 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 822 823 /* load up the local column indices */ 824 nzmax = nz; /* )th processor needs space a largest processor needs */ 825 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 826 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 827 cnt = 0; 828 for (i=0; i<mat->m; i++) { 829 for (j=B->i[i]; j<B->i[i+1]; j++) { 830 if ( (col = garray[B->j[j]]) > cstart) break; 831 column_indices[cnt++] = col; 832 } 833 for (k=A->i[i]; k<A->i[i+1]; k++) { 834 column_indices[cnt++] = A->j[k] + cstart; 835 } 836 for (; j<B->i[i+1]; j++) { 837 column_indices[cnt++] = garray[B->j[j]]; 838 } 839 } 840 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 841 842 /* store the column indices to the file */ 843 if (!rank) { 844 MPI_Status status; 845 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 846 for (i=1; i<size; i++) { 847 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 848 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 849 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 850 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 851 } 852 } else { 853 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 854 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 855 } 856 ierr = PetscFree(column_indices);CHKERRQ(ierr); 857 858 /* load up the local column values */ 859 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 860 cnt = 0; 861 for (i=0; i<mat->m; i++) { 862 for (j=B->i[i]; j<B->i[i+1]; j++) { 863 if ( garray[B->j[j]] > cstart) break; 864 column_values[cnt++] = B->a[j]; 865 } 866 for (k=A->i[i]; k<A->i[i+1]; k++) { 867 column_values[cnt++] = A->a[k]; 868 } 869 for (; j<B->i[i+1]; j++) { 870 column_values[cnt++] = B->a[j]; 871 } 872 } 873 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 874 875 /* store the column values to the file */ 876 if (!rank) { 877 MPI_Status status; 878 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 879 for (i=1; i<size; i++) { 880 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 881 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 882 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 883 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 884 } 885 } else { 886 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 887 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 888 } 889 ierr = PetscFree(column_values);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 895 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 896 { 897 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 898 PetscErrorCode ierr; 899 PetscMPIInt rank = aij->rank,size = aij->size; 900 PetscTruth isdraw,iascii,flg,isbinary; 901 PetscViewer sviewer; 902 PetscViewerFormat format; 903 904 PetscFunctionBegin; 905 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 906 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 907 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 908 if (iascii) { 909 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 910 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 911 MatInfo info; 912 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 913 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 914 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 915 if (flg) { 916 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 917 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 918 } else { 919 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 920 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 921 } 922 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 923 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 924 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 925 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 926 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 927 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 928 PetscFunctionReturn(0); 929 } else if (format == PETSC_VIEWER_ASCII_INFO) { 930 PetscFunctionReturn(0); 931 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 932 PetscFunctionReturn(0); 933 } 934 } else if (isbinary) { 935 if (size == 1) { 936 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 937 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 938 } else { 939 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 940 } 941 PetscFunctionReturn(0); 942 } else if (isdraw) { 943 PetscDraw draw; 944 PetscTruth isnull; 945 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 946 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 947 } 948 949 if (size == 1) { 950 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 951 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 952 } else { 953 /* assemble the entire matrix onto first processor. */ 954 Mat A; 955 Mat_SeqAIJ *Aloc; 956 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 957 PetscScalar *a; 958 959 if (!rank) { 960 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 961 } else { 962 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 963 } 964 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 965 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 966 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 967 PetscLogObjectParent(mat,A); 968 969 /* copy over the A part */ 970 Aloc = (Mat_SeqAIJ*)aij->A->data; 971 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 972 row = aij->rstart; 973 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 974 for (i=0; i<m; i++) { 975 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 976 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 977 } 978 aj = Aloc->j; 979 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 980 981 /* copy over the B part */ 982 Aloc = (Mat_SeqAIJ*)aij->B->data; 983 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 984 row = aij->rstart; 985 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 986 ct = cols; 987 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 988 for (i=0; i<m; i++) { 989 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 990 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 991 } 992 ierr = PetscFree(ct);CHKERRQ(ierr); 993 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 994 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 995 /* 996 Everyone has to call to draw the matrix since the graphics waits are 997 synchronized across all processors that share the PetscDraw object 998 */ 999 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1000 if (!rank) { 1001 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1002 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1003 } 1004 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1005 ierr = MatDestroy(A);CHKERRQ(ierr); 1006 } 1007 PetscFunctionReturn(0); 1008 } 1009 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "MatView_MPIAIJ" 1012 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1013 { 1014 PetscErrorCode ierr; 1015 PetscTruth iascii,isdraw,issocket,isbinary; 1016 1017 PetscFunctionBegin; 1018 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1019 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1020 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1021 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1022 if (iascii || isdraw || isbinary || issocket) { 1023 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1024 } else { 1025 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1026 } 1027 PetscFunctionReturn(0); 1028 } 1029 1030 1031 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatRelax_MPIAIJ" 1034 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1035 { 1036 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1037 PetscErrorCode ierr; 1038 Vec bb1; 1039 PetscScalar mone=-1.0; 1040 1041 PetscFunctionBegin; 1042 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1043 1044 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1045 1046 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1047 if (flag & SOR_ZERO_INITIAL_GUESS) { 1048 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1049 its--; 1050 } 1051 1052 while (its--) { 1053 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1054 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1055 1056 /* update rhs: bb1 = bb - B*x */ 1057 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1058 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1059 1060 /* local sweep */ 1061 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1062 CHKERRQ(ierr); 1063 } 1064 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1065 if (flag & SOR_ZERO_INITIAL_GUESS) { 1066 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1067 its--; 1068 } 1069 while (its--) { 1070 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1071 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1072 1073 /* update rhs: bb1 = bb - B*x */ 1074 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1075 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1076 1077 /* local sweep */ 1078 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1079 CHKERRQ(ierr); 1080 } 1081 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1082 if (flag & SOR_ZERO_INITIAL_GUESS) { 1083 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1084 its--; 1085 } 1086 while (its--) { 1087 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1088 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1089 1090 /* update rhs: bb1 = bb - B*x */ 1091 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1092 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1093 1094 /* local sweep */ 1095 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1096 CHKERRQ(ierr); 1097 } 1098 } else { 1099 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1100 } 1101 1102 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1108 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1109 { 1110 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1111 Mat A = mat->A,B = mat->B; 1112 PetscErrorCode ierr; 1113 PetscReal isend[5],irecv[5]; 1114 1115 PetscFunctionBegin; 1116 info->block_size = 1.0; 1117 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1118 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1119 isend[3] = info->memory; isend[4] = info->mallocs; 1120 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1121 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1122 isend[3] += info->memory; isend[4] += info->mallocs; 1123 if (flag == MAT_LOCAL) { 1124 info->nz_used = isend[0]; 1125 info->nz_allocated = isend[1]; 1126 info->nz_unneeded = isend[2]; 1127 info->memory = isend[3]; 1128 info->mallocs = isend[4]; 1129 } else if (flag == MAT_GLOBAL_MAX) { 1130 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1131 info->nz_used = irecv[0]; 1132 info->nz_allocated = irecv[1]; 1133 info->nz_unneeded = irecv[2]; 1134 info->memory = irecv[3]; 1135 info->mallocs = irecv[4]; 1136 } else if (flag == MAT_GLOBAL_SUM) { 1137 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1138 info->nz_used = irecv[0]; 1139 info->nz_allocated = irecv[1]; 1140 info->nz_unneeded = irecv[2]; 1141 info->memory = irecv[3]; 1142 info->mallocs = irecv[4]; 1143 } 1144 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1145 info->fill_ratio_needed = 0; 1146 info->factor_mallocs = 0; 1147 info->rows_global = (double)matin->M; 1148 info->columns_global = (double)matin->N; 1149 info->rows_local = (double)matin->m; 1150 info->columns_local = (double)matin->N; 1151 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatSetOption_MPIAIJ" 1157 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1158 { 1159 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1160 PetscErrorCode ierr; 1161 1162 PetscFunctionBegin; 1163 switch (op) { 1164 case MAT_NO_NEW_NONZERO_LOCATIONS: 1165 case MAT_YES_NEW_NONZERO_LOCATIONS: 1166 case MAT_COLUMNS_UNSORTED: 1167 case MAT_COLUMNS_SORTED: 1168 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1169 case MAT_KEEP_ZEROED_ROWS: 1170 case MAT_NEW_NONZERO_LOCATION_ERR: 1171 case MAT_USE_INODES: 1172 case MAT_DO_NOT_USE_INODES: 1173 case MAT_IGNORE_ZERO_ENTRIES: 1174 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1175 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1176 break; 1177 case MAT_ROW_ORIENTED: 1178 a->roworiented = PETSC_TRUE; 1179 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1180 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1181 break; 1182 case MAT_ROWS_SORTED: 1183 case MAT_ROWS_UNSORTED: 1184 case MAT_YES_NEW_DIAGONALS: 1185 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1186 break; 1187 case MAT_COLUMN_ORIENTED: 1188 a->roworiented = PETSC_FALSE; 1189 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1190 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1191 break; 1192 case MAT_IGNORE_OFF_PROC_ENTRIES: 1193 a->donotstash = PETSC_TRUE; 1194 break; 1195 case MAT_NO_NEW_DIAGONALS: 1196 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1197 case MAT_SYMMETRIC: 1198 case MAT_STRUCTURALLY_SYMMETRIC: 1199 case MAT_HERMITIAN: 1200 case MAT_SYMMETRY_ETERNAL: 1201 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1202 break; 1203 case MAT_NOT_SYMMETRIC: 1204 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1205 case MAT_NOT_HERMITIAN: 1206 case MAT_NOT_SYMMETRY_ETERNAL: 1207 break; 1208 default: 1209 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1210 } 1211 PetscFunctionReturn(0); 1212 } 1213 1214 #undef __FUNCT__ 1215 #define __FUNCT__ "MatGetRow_MPIAIJ" 1216 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1217 { 1218 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1219 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1220 PetscErrorCode ierr; 1221 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1222 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1223 PetscInt *cmap,*idx_p; 1224 1225 PetscFunctionBegin; 1226 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1227 mat->getrowactive = PETSC_TRUE; 1228 1229 if (!mat->rowvalues && (idx || v)) { 1230 /* 1231 allocate enough space to hold information from the longest row. 1232 */ 1233 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1234 PetscInt max = 1,tmp; 1235 for (i=0; i<matin->m; i++) { 1236 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1237 if (max < tmp) { max = tmp; } 1238 } 1239 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1240 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1241 } 1242 1243 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1244 lrow = row - rstart; 1245 1246 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1247 if (!v) {pvA = 0; pvB = 0;} 1248 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1249 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1250 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1251 nztot = nzA + nzB; 1252 1253 cmap = mat->garray; 1254 if (v || idx) { 1255 if (nztot) { 1256 /* Sort by increasing column numbers, assuming A and B already sorted */ 1257 PetscInt imark = -1; 1258 if (v) { 1259 *v = v_p = mat->rowvalues; 1260 for (i=0; i<nzB; i++) { 1261 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1262 else break; 1263 } 1264 imark = i; 1265 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1266 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1267 } 1268 if (idx) { 1269 *idx = idx_p = mat->rowindices; 1270 if (imark > -1) { 1271 for (i=0; i<imark; i++) { 1272 idx_p[i] = cmap[cworkB[i]]; 1273 } 1274 } else { 1275 for (i=0; i<nzB; i++) { 1276 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1277 else break; 1278 } 1279 imark = i; 1280 } 1281 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1282 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1283 } 1284 } else { 1285 if (idx) *idx = 0; 1286 if (v) *v = 0; 1287 } 1288 } 1289 *nz = nztot; 1290 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1291 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 #undef __FUNCT__ 1296 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1297 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1298 { 1299 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1300 1301 PetscFunctionBegin; 1302 if (aij->getrowactive == PETSC_FALSE) { 1303 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1304 } 1305 aij->getrowactive = PETSC_FALSE; 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatNorm_MPIAIJ" 1311 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1312 { 1313 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1314 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1315 PetscErrorCode ierr; 1316 PetscInt i,j,cstart = aij->cstart; 1317 PetscReal sum = 0.0; 1318 PetscScalar *v; 1319 1320 PetscFunctionBegin; 1321 if (aij->size == 1) { 1322 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1323 } else { 1324 if (type == NORM_FROBENIUS) { 1325 v = amat->a; 1326 for (i=0; i<amat->nz; i++) { 1327 #if defined(PETSC_USE_COMPLEX) 1328 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1329 #else 1330 sum += (*v)*(*v); v++; 1331 #endif 1332 } 1333 v = bmat->a; 1334 for (i=0; i<bmat->nz; i++) { 1335 #if defined(PETSC_USE_COMPLEX) 1336 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1337 #else 1338 sum += (*v)*(*v); v++; 1339 #endif 1340 } 1341 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1342 *norm = sqrt(*norm); 1343 } else if (type == NORM_1) { /* max column norm */ 1344 PetscReal *tmp,*tmp2; 1345 PetscInt *jj,*garray = aij->garray; 1346 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1347 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1348 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1349 *norm = 0.0; 1350 v = amat->a; jj = amat->j; 1351 for (j=0; j<amat->nz; j++) { 1352 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1353 } 1354 v = bmat->a; jj = bmat->j; 1355 for (j=0; j<bmat->nz; j++) { 1356 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1357 } 1358 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1359 for (j=0; j<mat->N; j++) { 1360 if (tmp2[j] > *norm) *norm = tmp2[j]; 1361 } 1362 ierr = PetscFree(tmp);CHKERRQ(ierr); 1363 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1364 } else if (type == NORM_INFINITY) { /* max row norm */ 1365 PetscReal ntemp = 0.0; 1366 for (j=0; j<aij->A->m; j++) { 1367 v = amat->a + amat->i[j]; 1368 sum = 0.0; 1369 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1370 sum += PetscAbsScalar(*v); v++; 1371 } 1372 v = bmat->a + bmat->i[j]; 1373 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1374 sum += PetscAbsScalar(*v); v++; 1375 } 1376 if (sum > ntemp) ntemp = sum; 1377 } 1378 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1379 } else { 1380 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1381 } 1382 } 1383 PetscFunctionReturn(0); 1384 } 1385 1386 #undef __FUNCT__ 1387 #define __FUNCT__ "MatTranspose_MPIAIJ" 1388 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1389 { 1390 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1391 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1392 PetscErrorCode ierr; 1393 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1394 Mat B; 1395 PetscScalar *array; 1396 1397 PetscFunctionBegin; 1398 if (!matout && M != N) { 1399 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1400 } 1401 1402 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1403 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1404 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1405 1406 /* copy over the A part */ 1407 Aloc = (Mat_SeqAIJ*)a->A->data; 1408 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1409 row = a->rstart; 1410 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1411 for (i=0; i<m; i++) { 1412 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1413 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1414 } 1415 aj = Aloc->j; 1416 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1417 1418 /* copy over the B part */ 1419 Aloc = (Mat_SeqAIJ*)a->B->data; 1420 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1421 row = a->rstart; 1422 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1423 ct = cols; 1424 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1425 for (i=0; i<m; i++) { 1426 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1427 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1428 } 1429 ierr = PetscFree(ct);CHKERRQ(ierr); 1430 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1431 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1432 if (matout) { 1433 *matout = B; 1434 } else { 1435 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1436 } 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #undef __FUNCT__ 1441 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1442 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1443 { 1444 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1445 Mat a = aij->A,b = aij->B; 1446 PetscErrorCode ierr; 1447 PetscInt s1,s2,s3; 1448 1449 PetscFunctionBegin; 1450 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1451 if (rr) { 1452 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1453 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1454 /* Overlap communication with computation. */ 1455 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1456 } 1457 if (ll) { 1458 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1459 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1460 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1461 } 1462 /* scale the diagonal block */ 1463 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1464 1465 if (rr) { 1466 /* Do a scatter end and then right scale the off-diagonal block */ 1467 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1468 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1469 } 1470 1471 PetscFunctionReturn(0); 1472 } 1473 1474 1475 #undef __FUNCT__ 1476 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1477 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1478 { 1479 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1480 PetscErrorCode ierr; 1481 1482 PetscFunctionBegin; 1483 if (!a->rank) { 1484 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1485 } 1486 PetscFunctionReturn(0); 1487 } 1488 1489 #undef __FUNCT__ 1490 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1491 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1492 { 1493 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1494 PetscErrorCode ierr; 1495 1496 PetscFunctionBegin; 1497 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1498 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 #undef __FUNCT__ 1502 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1503 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1504 { 1505 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1506 PetscErrorCode ierr; 1507 1508 PetscFunctionBegin; 1509 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1510 PetscFunctionReturn(0); 1511 } 1512 1513 #undef __FUNCT__ 1514 #define __FUNCT__ "MatEqual_MPIAIJ" 1515 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1516 { 1517 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1518 Mat a,b,c,d; 1519 PetscTruth flg; 1520 PetscErrorCode ierr; 1521 1522 PetscFunctionBegin; 1523 a = matA->A; b = matA->B; 1524 c = matB->A; d = matB->B; 1525 1526 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1527 if (flg == PETSC_TRUE) { 1528 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1529 } 1530 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1531 PetscFunctionReturn(0); 1532 } 1533 1534 #undef __FUNCT__ 1535 #define __FUNCT__ "MatCopy_MPIAIJ" 1536 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1537 { 1538 PetscErrorCode ierr; 1539 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1540 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1541 1542 PetscFunctionBegin; 1543 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1544 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1545 /* because of the column compression in the off-processor part of the matrix a->B, 1546 the number of columns in a->B and b->B may be different, hence we cannot call 1547 the MatCopy() directly on the two parts. If need be, we can provide a more 1548 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1549 then copying the submatrices */ 1550 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1551 } else { 1552 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1553 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1554 } 1555 PetscFunctionReturn(0); 1556 } 1557 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1560 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1561 { 1562 PetscErrorCode ierr; 1563 1564 PetscFunctionBegin; 1565 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1566 PetscFunctionReturn(0); 1567 } 1568 1569 #include "petscblaslapack.h" 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "MatAXPY_MPIAIJ" 1572 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1573 { 1574 PetscErrorCode ierr; 1575 PetscInt i; 1576 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1577 PetscBLASInt bnz,one=1; 1578 Mat_SeqAIJ *x,*y; 1579 1580 PetscFunctionBegin; 1581 if (str == SAME_NONZERO_PATTERN) { 1582 x = (Mat_SeqAIJ *)xx->A->data; 1583 y = (Mat_SeqAIJ *)yy->A->data; 1584 bnz = (PetscBLASInt)x->nz; 1585 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1586 x = (Mat_SeqAIJ *)xx->B->data; 1587 y = (Mat_SeqAIJ *)yy->B->data; 1588 bnz = (PetscBLASInt)x->nz; 1589 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1590 } else if (str == SUBSET_NONZERO_PATTERN) { 1591 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1592 1593 x = (Mat_SeqAIJ *)xx->B->data; 1594 y = (Mat_SeqAIJ *)yy->B->data; 1595 if (y->xtoy && y->XtoY != xx->B) { 1596 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1597 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1598 } 1599 if (!y->xtoy) { /* get xtoy */ 1600 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1601 y->XtoY = xx->B; 1602 } 1603 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1604 } else { 1605 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1606 } 1607 PetscFunctionReturn(0); 1608 } 1609 1610 /* -------------------------------------------------------------------*/ 1611 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1612 MatGetRow_MPIAIJ, 1613 MatRestoreRow_MPIAIJ, 1614 MatMult_MPIAIJ, 1615 /* 4*/ MatMultAdd_MPIAIJ, 1616 MatMultTranspose_MPIAIJ, 1617 MatMultTransposeAdd_MPIAIJ, 1618 0, 1619 0, 1620 0, 1621 /*10*/ 0, 1622 0, 1623 0, 1624 MatRelax_MPIAIJ, 1625 MatTranspose_MPIAIJ, 1626 /*15*/ MatGetInfo_MPIAIJ, 1627 MatEqual_MPIAIJ, 1628 MatGetDiagonal_MPIAIJ, 1629 MatDiagonalScale_MPIAIJ, 1630 MatNorm_MPIAIJ, 1631 /*20*/ MatAssemblyBegin_MPIAIJ, 1632 MatAssemblyEnd_MPIAIJ, 1633 0, 1634 MatSetOption_MPIAIJ, 1635 MatZeroEntries_MPIAIJ, 1636 /*25*/ MatZeroRows_MPIAIJ, 1637 0, 1638 0, 1639 0, 1640 0, 1641 /*30*/ MatSetUpPreallocation_MPIAIJ, 1642 0, 1643 0, 1644 0, 1645 0, 1646 /*35*/ MatDuplicate_MPIAIJ, 1647 0, 1648 0, 1649 0, 1650 0, 1651 /*40*/ MatAXPY_MPIAIJ, 1652 MatGetSubMatrices_MPIAIJ, 1653 MatIncreaseOverlap_MPIAIJ, 1654 MatGetValues_MPIAIJ, 1655 MatCopy_MPIAIJ, 1656 /*45*/ MatPrintHelp_MPIAIJ, 1657 MatScale_MPIAIJ, 1658 0, 1659 0, 1660 0, 1661 /*50*/ MatSetBlockSize_MPIAIJ, 1662 0, 1663 0, 1664 0, 1665 0, 1666 /*55*/ MatFDColoringCreate_MPIAIJ, 1667 0, 1668 MatSetUnfactored_MPIAIJ, 1669 0, 1670 0, 1671 /*60*/ MatGetSubMatrix_MPIAIJ, 1672 MatDestroy_MPIAIJ, 1673 MatView_MPIAIJ, 1674 MatGetPetscMaps_Petsc, 1675 0, 1676 /*65*/ 0, 1677 0, 1678 0, 1679 0, 1680 0, 1681 /*70*/ 0, 1682 0, 1683 MatSetColoring_MPIAIJ, 1684 MatSetValuesAdic_MPIAIJ, 1685 MatSetValuesAdifor_MPIAIJ, 1686 /*75*/ 0, 1687 0, 1688 0, 1689 0, 1690 0, 1691 /*80*/ 0, 1692 0, 1693 0, 1694 0, 1695 /*84*/ MatLoad_MPIAIJ, 1696 0, 1697 0, 1698 0, 1699 0, 1700 0, 1701 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1702 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1703 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1704 MatPtAP_MPIAIJ_MPIAIJ, 1705 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1706 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 1707 0, 1708 0, 1709 0}; 1710 1711 /* ----------------------------------------------------------------------------------------*/ 1712 1713 EXTERN_C_BEGIN 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1716 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1717 { 1718 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1719 PetscErrorCode ierr; 1720 1721 PetscFunctionBegin; 1722 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1723 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1724 PetscFunctionReturn(0); 1725 } 1726 EXTERN_C_END 1727 1728 EXTERN_C_BEGIN 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1731 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1732 { 1733 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1734 PetscErrorCode ierr; 1735 1736 PetscFunctionBegin; 1737 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1738 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1739 PetscFunctionReturn(0); 1740 } 1741 EXTERN_C_END 1742 1743 #include "petscpc.h" 1744 EXTERN_C_BEGIN 1745 #undef __FUNCT__ 1746 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1747 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1748 { 1749 Mat_MPIAIJ *b; 1750 PetscErrorCode ierr; 1751 PetscInt i; 1752 1753 PetscFunctionBegin; 1754 B->preallocated = PETSC_TRUE; 1755 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1756 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1757 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1758 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1759 if (d_nnz) { 1760 for (i=0; i<B->m; i++) { 1761 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1762 } 1763 } 1764 if (o_nnz) { 1765 for (i=0; i<B->m; i++) { 1766 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1767 } 1768 } 1769 b = (Mat_MPIAIJ*)B->data; 1770 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1771 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1772 1773 PetscFunctionReturn(0); 1774 } 1775 EXTERN_C_END 1776 1777 /*MC 1778 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1779 1780 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1781 and MATMPIAIJ otherwise. As a result, for single process communicators, 1782 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1783 for communicators controlling multiple processes. It is recommended that you call both of 1784 the above preallocation routines for simplicity. 1785 1786 Options Database Keys: 1787 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1788 1789 Level: beginner 1790 1791 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1792 M*/ 1793 1794 EXTERN_C_BEGIN 1795 #undef __FUNCT__ 1796 #define __FUNCT__ "MatCreate_AIJ" 1797 PetscErrorCode MatCreate_AIJ(Mat A) 1798 { 1799 PetscErrorCode ierr; 1800 PetscMPIInt size; 1801 1802 PetscFunctionBegin; 1803 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1804 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1805 if (size == 1) { 1806 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1807 } else { 1808 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1809 } 1810 PetscFunctionReturn(0); 1811 } 1812 EXTERN_C_END 1813 1814 #undef __FUNCT__ 1815 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1816 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1817 { 1818 Mat mat; 1819 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1820 PetscErrorCode ierr; 1821 1822 PetscFunctionBegin; 1823 *newmat = 0; 1824 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1825 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1826 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1827 a = (Mat_MPIAIJ*)mat->data; 1828 1829 mat->factor = matin->factor; 1830 mat->bs = matin->bs; 1831 mat->assembled = PETSC_TRUE; 1832 mat->insertmode = NOT_SET_VALUES; 1833 mat->preallocated = PETSC_TRUE; 1834 1835 a->rstart = oldmat->rstart; 1836 a->rend = oldmat->rend; 1837 a->cstart = oldmat->cstart; 1838 a->cend = oldmat->cend; 1839 a->size = oldmat->size; 1840 a->rank = oldmat->rank; 1841 a->donotstash = oldmat->donotstash; 1842 a->roworiented = oldmat->roworiented; 1843 a->rowindices = 0; 1844 a->rowvalues = 0; 1845 a->getrowactive = PETSC_FALSE; 1846 1847 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1848 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1849 if (oldmat->colmap) { 1850 #if defined (PETSC_USE_CTABLE) 1851 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1852 #else 1853 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1854 PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt)); 1855 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1856 #endif 1857 } else a->colmap = 0; 1858 if (oldmat->garray) { 1859 PetscInt len; 1860 len = oldmat->B->n; 1861 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1862 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 1863 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1864 } else a->garray = 0; 1865 1866 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1867 PetscLogObjectParent(mat,a->lvec); 1868 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1869 PetscLogObjectParent(mat,a->Mvctx); 1870 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1871 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1872 PetscLogObjectParent(mat,a->A); 1873 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1874 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1875 PetscLogObjectParent(mat,a->B); 1876 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1877 *newmat = mat; 1878 PetscFunctionReturn(0); 1879 } 1880 1881 #include "petscsys.h" 1882 1883 #undef __FUNCT__ 1884 #define __FUNCT__ "MatLoad_MPIAIJ" 1885 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1886 { 1887 Mat A; 1888 PetscScalar *vals,*svals; 1889 MPI_Comm comm = ((PetscObject)viewer)->comm; 1890 MPI_Status status; 1891 PetscErrorCode ierr; 1892 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1893 PetscInt i,nz,j,rstart,rend; 1894 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1895 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1896 PetscInt cend,cstart,n,*rowners; 1897 int fd; 1898 1899 PetscFunctionBegin; 1900 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1901 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1902 if (!rank) { 1903 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1904 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1905 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1906 } 1907 1908 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1909 M = header[1]; N = header[2]; 1910 /* determine ownership of all rows */ 1911 m = M/size + ((M % size) > rank); 1912 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1913 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1914 rowners[0] = 0; 1915 for (i=2; i<=size; i++) { 1916 rowners[i] += rowners[i-1]; 1917 } 1918 rstart = rowners[rank]; 1919 rend = rowners[rank+1]; 1920 1921 /* distribute row lengths to all processors */ 1922 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 1923 if (!rank) { 1924 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1925 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1926 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1927 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1928 for (j=0; j<m; j++) { 1929 procsnz[0] += ourlens[j]; 1930 } 1931 for (i=1; i<size; i++) { 1932 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1933 /* calculate the number of nonzeros on each processor */ 1934 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1935 procsnz[i] += rowlengths[j]; 1936 } 1937 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1938 } 1939 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1940 } else { 1941 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1942 } 1943 1944 if (!rank) { 1945 /* determine max buffer needed and allocate it */ 1946 maxnz = 0; 1947 for (i=0; i<size; i++) { 1948 maxnz = PetscMax(maxnz,procsnz[i]); 1949 } 1950 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1951 1952 /* read in my part of the matrix column indices */ 1953 nz = procsnz[0]; 1954 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1955 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1956 1957 /* read in every one elses and ship off */ 1958 for (i=1; i<size; i++) { 1959 nz = procsnz[i]; 1960 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1961 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1962 } 1963 ierr = PetscFree(cols);CHKERRQ(ierr); 1964 } else { 1965 /* determine buffer space needed for message */ 1966 nz = 0; 1967 for (i=0; i<m; i++) { 1968 nz += ourlens[i]; 1969 } 1970 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1971 1972 /* receive message of column indices*/ 1973 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1974 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1975 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1976 } 1977 1978 /* determine column ownership if matrix is not square */ 1979 if (N != M) { 1980 n = N/size + ((N % size) > rank); 1981 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1982 cstart = cend - n; 1983 } else { 1984 cstart = rstart; 1985 cend = rend; 1986 n = cend - cstart; 1987 } 1988 1989 /* loop over local rows, determining number of off diagonal entries */ 1990 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1991 jj = 0; 1992 for (i=0; i<m; i++) { 1993 for (j=0; j<ourlens[i]; j++) { 1994 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1995 jj++; 1996 } 1997 } 1998 1999 /* create our matrix */ 2000 for (i=0; i<m; i++) { 2001 ourlens[i] -= offlens[i]; 2002 } 2003 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2004 ierr = MatSetType(A,type);CHKERRQ(ierr); 2005 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2006 2007 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2008 for (i=0; i<m; i++) { 2009 ourlens[i] += offlens[i]; 2010 } 2011 2012 if (!rank) { 2013 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2014 2015 /* read in my part of the matrix numerical values */ 2016 nz = procsnz[0]; 2017 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2018 2019 /* insert into matrix */ 2020 jj = rstart; 2021 smycols = mycols; 2022 svals = vals; 2023 for (i=0; i<m; i++) { 2024 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2025 smycols += ourlens[i]; 2026 svals += ourlens[i]; 2027 jj++; 2028 } 2029 2030 /* read in other processors and ship out */ 2031 for (i=1; i<size; i++) { 2032 nz = procsnz[i]; 2033 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2034 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2035 } 2036 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2037 } else { 2038 /* receive numeric values */ 2039 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2040 2041 /* receive message of values*/ 2042 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2043 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2044 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2045 2046 /* insert into matrix */ 2047 jj = rstart; 2048 smycols = mycols; 2049 svals = vals; 2050 for (i=0; i<m; i++) { 2051 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2052 smycols += ourlens[i]; 2053 svals += ourlens[i]; 2054 jj++; 2055 } 2056 } 2057 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2058 ierr = PetscFree(vals);CHKERRQ(ierr); 2059 ierr = PetscFree(mycols);CHKERRQ(ierr); 2060 ierr = PetscFree(rowners);CHKERRQ(ierr); 2061 2062 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2063 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2064 *newmat = A; 2065 PetscFunctionReturn(0); 2066 } 2067 2068 #undef __FUNCT__ 2069 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2070 /* 2071 Not great since it makes two copies of the submatrix, first an SeqAIJ 2072 in local and then by concatenating the local matrices the end result. 2073 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2074 */ 2075 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2076 { 2077 PetscErrorCode ierr; 2078 PetscMPIInt rank,size; 2079 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2080 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2081 Mat *local,M,Mreuse; 2082 PetscScalar *vwork,*aa; 2083 MPI_Comm comm = mat->comm; 2084 Mat_SeqAIJ *aij; 2085 2086 2087 PetscFunctionBegin; 2088 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2089 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2090 2091 if (call == MAT_REUSE_MATRIX) { 2092 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2093 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2094 local = &Mreuse; 2095 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2096 } else { 2097 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2098 Mreuse = *local; 2099 ierr = PetscFree(local);CHKERRQ(ierr); 2100 } 2101 2102 /* 2103 m - number of local rows 2104 n - number of columns (same on all processors) 2105 rstart - first row in new global matrix generated 2106 */ 2107 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2108 if (call == MAT_INITIAL_MATRIX) { 2109 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2110 ii = aij->i; 2111 jj = aij->j; 2112 2113 /* 2114 Determine the number of non-zeros in the diagonal and off-diagonal 2115 portions of the matrix in order to do correct preallocation 2116 */ 2117 2118 /* first get start and end of "diagonal" columns */ 2119 if (csize == PETSC_DECIDE) { 2120 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2121 if (mglobal == n) { /* square matrix */ 2122 nlocal = m; 2123 } else { 2124 nlocal = n/size + ((n % size) > rank); 2125 } 2126 } else { 2127 nlocal = csize; 2128 } 2129 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2130 rstart = rend - nlocal; 2131 if (rank == size - 1 && rend != n) { 2132 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2133 } 2134 2135 /* next, compute all the lengths */ 2136 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2137 olens = dlens + m; 2138 for (i=0; i<m; i++) { 2139 jend = ii[i+1] - ii[i]; 2140 olen = 0; 2141 dlen = 0; 2142 for (j=0; j<jend; j++) { 2143 if (*jj < rstart || *jj >= rend) olen++; 2144 else dlen++; 2145 jj++; 2146 } 2147 olens[i] = olen; 2148 dlens[i] = dlen; 2149 } 2150 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2151 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2152 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2153 ierr = PetscFree(dlens);CHKERRQ(ierr); 2154 } else { 2155 PetscInt ml,nl; 2156 2157 M = *newmat; 2158 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2159 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2160 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2161 /* 2162 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2163 rather than the slower MatSetValues(). 2164 */ 2165 M->was_assembled = PETSC_TRUE; 2166 M->assembled = PETSC_FALSE; 2167 } 2168 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2169 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2170 ii = aij->i; 2171 jj = aij->j; 2172 aa = aij->a; 2173 for (i=0; i<m; i++) { 2174 row = rstart + i; 2175 nz = ii[i+1] - ii[i]; 2176 cwork = jj; jj += nz; 2177 vwork = aa; aa += nz; 2178 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2179 } 2180 2181 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2182 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2183 *newmat = M; 2184 2185 /* save submatrix used in processor for next request */ 2186 if (call == MAT_INITIAL_MATRIX) { 2187 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2188 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2189 } 2190 2191 PetscFunctionReturn(0); 2192 } 2193 2194 EXTERN_C_BEGIN 2195 #undef __FUNCT__ 2196 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2197 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2198 { 2199 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2200 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2201 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2202 const PetscInt *JJ; 2203 PetscScalar *values; 2204 PetscErrorCode ierr; 2205 2206 PetscFunctionBegin; 2207 #if defined(PETSC_OPT_g) 2208 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2209 #endif 2210 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2211 o_nnz = d_nnz + m; 2212 2213 for (i=0; i<m; i++) { 2214 nnz = I[i+1]- I[i]; 2215 JJ = J + I[i]; 2216 nnz_max = PetscMax(nnz_max,nnz); 2217 #if defined(PETSC_OPT_g) 2218 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2219 #endif 2220 for (j=0; j<nnz; j++) { 2221 if (*JJ >= cstart) break; 2222 JJ++; 2223 } 2224 d = 0; 2225 for (; j<nnz; j++) { 2226 if (*JJ++ >= cend) break; 2227 d++; 2228 } 2229 d_nnz[i] = d; 2230 o_nnz[i] = nnz - d; 2231 } 2232 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2233 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2234 2235 if (v) values = (PetscScalar*)v; 2236 else { 2237 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2238 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2239 } 2240 2241 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2242 for (i=0; i<m; i++) { 2243 ii = i + rstart; 2244 nnz = I[i+1]- I[i]; 2245 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2246 } 2247 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2248 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2249 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2250 2251 if (!v) { 2252 ierr = PetscFree(values);CHKERRQ(ierr); 2253 } 2254 PetscFunctionReturn(0); 2255 } 2256 EXTERN_C_END 2257 2258 #undef __FUNCT__ 2259 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2260 /*@C 2261 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2262 (the default parallel PETSc format). 2263 2264 Collective on MPI_Comm 2265 2266 Input Parameters: 2267 + A - the matrix 2268 . i - the indices into j for the start of each local row (starts with zero) 2269 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2270 - v - optional values in the matrix 2271 2272 Level: developer 2273 2274 .keywords: matrix, aij, compressed row, sparse, parallel 2275 2276 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2277 @*/ 2278 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2279 { 2280 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2281 2282 PetscFunctionBegin; 2283 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2284 if (f) { 2285 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2286 } 2287 PetscFunctionReturn(0); 2288 } 2289 2290 #undef __FUNCT__ 2291 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2292 /*@C 2293 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2294 (the default parallel PETSc format). For good matrix assembly performance 2295 the user should preallocate the matrix storage by setting the parameters 2296 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2297 performance can be increased by more than a factor of 50. 2298 2299 Collective on MPI_Comm 2300 2301 Input Parameters: 2302 + A - the matrix 2303 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2304 (same value is used for all local rows) 2305 . d_nnz - array containing the number of nonzeros in the various rows of the 2306 DIAGONAL portion of the local submatrix (possibly different for each row) 2307 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2308 The size of this array is equal to the number of local rows, i.e 'm'. 2309 You must leave room for the diagonal entry even if it is zero. 2310 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2311 submatrix (same value is used for all local rows). 2312 - o_nnz - array containing the number of nonzeros in the various rows of the 2313 OFF-DIAGONAL portion of the local submatrix (possibly different for 2314 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2315 structure. The size of this array is equal to the number 2316 of local rows, i.e 'm'. 2317 2318 If the *_nnz parameter is given then the *_nz parameter is ignored 2319 2320 The AIJ format (also called the Yale sparse matrix format or 2321 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2322 storage. The stored row and column indices begin with zero. See the users manual for details. 2323 2324 The parallel matrix is partitioned such that the first m0 rows belong to 2325 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2326 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2327 2328 The DIAGONAL portion of the local submatrix of a processor can be defined 2329 as the submatrix which is obtained by extraction the part corresponding 2330 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2331 first row that belongs to the processor, and r2 is the last row belonging 2332 to the this processor. This is a square mxm matrix. The remaining portion 2333 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2334 2335 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2336 2337 Example usage: 2338 2339 Consider the following 8x8 matrix with 34 non-zero values, that is 2340 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2341 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2342 as follows: 2343 2344 .vb 2345 1 2 0 | 0 3 0 | 0 4 2346 Proc0 0 5 6 | 7 0 0 | 8 0 2347 9 0 10 | 11 0 0 | 12 0 2348 ------------------------------------- 2349 13 0 14 | 15 16 17 | 0 0 2350 Proc1 0 18 0 | 19 20 21 | 0 0 2351 0 0 0 | 22 23 0 | 24 0 2352 ------------------------------------- 2353 Proc2 25 26 27 | 0 0 28 | 29 0 2354 30 0 0 | 31 32 33 | 0 34 2355 .ve 2356 2357 This can be represented as a collection of submatrices as: 2358 2359 .vb 2360 A B C 2361 D E F 2362 G H I 2363 .ve 2364 2365 Where the submatrices A,B,C are owned by proc0, D,E,F are 2366 owned by proc1, G,H,I are owned by proc2. 2367 2368 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2369 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2370 The 'M','N' parameters are 8,8, and have the same values on all procs. 2371 2372 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2373 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2374 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2375 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2376 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2377 matrix, ans [DF] as another SeqAIJ matrix. 2378 2379 When d_nz, o_nz parameters are specified, d_nz storage elements are 2380 allocated for every row of the local diagonal submatrix, and o_nz 2381 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2382 One way to choose d_nz and o_nz is to use the max nonzerors per local 2383 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2384 In this case, the values of d_nz,o_nz are: 2385 .vb 2386 proc0 : dnz = 2, o_nz = 2 2387 proc1 : dnz = 3, o_nz = 2 2388 proc2 : dnz = 1, o_nz = 4 2389 .ve 2390 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2391 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2392 for proc3. i.e we are using 12+15+10=37 storage locations to store 2393 34 values. 2394 2395 When d_nnz, o_nnz parameters are specified, the storage is specified 2396 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2397 In the above case the values for d_nnz,o_nnz are: 2398 .vb 2399 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2400 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2401 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2402 .ve 2403 Here the space allocated is sum of all the above values i.e 34, and 2404 hence pre-allocation is perfect. 2405 2406 Level: intermediate 2407 2408 .keywords: matrix, aij, compressed row, sparse, parallel 2409 2410 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2411 MPIAIJ 2412 @*/ 2413 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2414 { 2415 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2416 2417 PetscFunctionBegin; 2418 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2419 if (f) { 2420 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2421 } 2422 PetscFunctionReturn(0); 2423 } 2424 2425 #undef __FUNCT__ 2426 #define __FUNCT__ "MatCreateMPIAIJ" 2427 /*@C 2428 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2429 (the default parallel PETSc format). For good matrix assembly performance 2430 the user should preallocate the matrix storage by setting the parameters 2431 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2432 performance can be increased by more than a factor of 50. 2433 2434 Collective on MPI_Comm 2435 2436 Input Parameters: 2437 + comm - MPI communicator 2438 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2439 This value should be the same as the local size used in creating the 2440 y vector for the matrix-vector product y = Ax. 2441 . n - This value should be the same as the local size used in creating the 2442 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2443 calculated if N is given) For square matrices n is almost always m. 2444 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2445 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2446 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2447 (same value is used for all local rows) 2448 . d_nnz - array containing the number of nonzeros in the various rows of the 2449 DIAGONAL portion of the local submatrix (possibly different for each row) 2450 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2451 The size of this array is equal to the number of local rows, i.e 'm'. 2452 You must leave room for the diagonal entry even if it is zero. 2453 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2454 submatrix (same value is used for all local rows). 2455 - o_nnz - array containing the number of nonzeros in the various rows of the 2456 OFF-DIAGONAL portion of the local submatrix (possibly different for 2457 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2458 structure. The size of this array is equal to the number 2459 of local rows, i.e 'm'. 2460 2461 Output Parameter: 2462 . A - the matrix 2463 2464 Notes: 2465 If the *_nnz parameter is given then the *_nz parameter is ignored 2466 2467 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2468 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2469 storage requirements for this matrix. 2470 2471 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2472 processor than it must be used on all processors that share the object for 2473 that argument. 2474 2475 The user MUST specify either the local or global matrix dimensions 2476 (possibly both). 2477 2478 The parallel matrix is partitioned such that the first m0 rows belong to 2479 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2480 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2481 2482 The DIAGONAL portion of the local submatrix of a processor can be defined 2483 as the submatrix which is obtained by extraction the part corresponding 2484 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2485 first row that belongs to the processor, and r2 is the last row belonging 2486 to the this processor. This is a square mxm matrix. The remaining portion 2487 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2488 2489 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2490 2491 When calling this routine with a single process communicator, a matrix of 2492 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2493 type of communicator, use the construction mechanism: 2494 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2495 2496 By default, this format uses inodes (identical nodes) when possible. 2497 We search for consecutive rows with the same nonzero structure, thereby 2498 reusing matrix information to achieve increased efficiency. 2499 2500 Options Database Keys: 2501 + -mat_aij_no_inode - Do not use inodes 2502 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2503 - -mat_aij_oneindex - Internally use indexing starting at 1 2504 rather than 0. Note that when calling MatSetValues(), 2505 the user still MUST index entries starting at 0! 2506 2507 2508 Example usage: 2509 2510 Consider the following 8x8 matrix with 34 non-zero values, that is 2511 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2512 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2513 as follows: 2514 2515 .vb 2516 1 2 0 | 0 3 0 | 0 4 2517 Proc0 0 5 6 | 7 0 0 | 8 0 2518 9 0 10 | 11 0 0 | 12 0 2519 ------------------------------------- 2520 13 0 14 | 15 16 17 | 0 0 2521 Proc1 0 18 0 | 19 20 21 | 0 0 2522 0 0 0 | 22 23 0 | 24 0 2523 ------------------------------------- 2524 Proc2 25 26 27 | 0 0 28 | 29 0 2525 30 0 0 | 31 32 33 | 0 34 2526 .ve 2527 2528 This can be represented as a collection of submatrices as: 2529 2530 .vb 2531 A B C 2532 D E F 2533 G H I 2534 .ve 2535 2536 Where the submatrices A,B,C are owned by proc0, D,E,F are 2537 owned by proc1, G,H,I are owned by proc2. 2538 2539 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2540 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2541 The 'M','N' parameters are 8,8, and have the same values on all procs. 2542 2543 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2544 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2545 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2546 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2547 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2548 matrix, ans [DF] as another SeqAIJ matrix. 2549 2550 When d_nz, o_nz parameters are specified, d_nz storage elements are 2551 allocated for every row of the local diagonal submatrix, and o_nz 2552 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2553 One way to choose d_nz and o_nz is to use the max nonzerors per local 2554 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2555 In this case, the values of d_nz,o_nz are: 2556 .vb 2557 proc0 : dnz = 2, o_nz = 2 2558 proc1 : dnz = 3, o_nz = 2 2559 proc2 : dnz = 1, o_nz = 4 2560 .ve 2561 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2562 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2563 for proc3. i.e we are using 12+15+10=37 storage locations to store 2564 34 values. 2565 2566 When d_nnz, o_nnz parameters are specified, the storage is specified 2567 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2568 In the above case the values for d_nnz,o_nnz are: 2569 .vb 2570 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2571 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2572 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2573 .ve 2574 Here the space allocated is sum of all the above values i.e 34, and 2575 hence pre-allocation is perfect. 2576 2577 Level: intermediate 2578 2579 .keywords: matrix, aij, compressed row, sparse, parallel 2580 2581 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2582 MPIAIJ 2583 @*/ 2584 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2585 { 2586 PetscErrorCode ierr; 2587 PetscMPIInt size; 2588 2589 PetscFunctionBegin; 2590 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2591 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2592 if (size > 1) { 2593 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2594 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2595 } else { 2596 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2597 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2598 } 2599 PetscFunctionReturn(0); 2600 } 2601 2602 #undef __FUNCT__ 2603 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2604 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2605 { 2606 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2607 2608 PetscFunctionBegin; 2609 *Ad = a->A; 2610 *Ao = a->B; 2611 *colmap = a->garray; 2612 PetscFunctionReturn(0); 2613 } 2614 2615 #undef __FUNCT__ 2616 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2617 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2618 { 2619 PetscErrorCode ierr; 2620 PetscInt i; 2621 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2622 2623 PetscFunctionBegin; 2624 if (coloring->ctype == IS_COLORING_LOCAL) { 2625 ISColoringValue *allcolors,*colors; 2626 ISColoring ocoloring; 2627 2628 /* set coloring for diagonal portion */ 2629 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2630 2631 /* set coloring for off-diagonal portion */ 2632 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2633 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2634 for (i=0; i<a->B->n; i++) { 2635 colors[i] = allcolors[a->garray[i]]; 2636 } 2637 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2638 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2639 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2640 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2641 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2642 ISColoringValue *colors; 2643 PetscInt *larray; 2644 ISColoring ocoloring; 2645 2646 /* set coloring for diagonal portion */ 2647 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2648 for (i=0; i<a->A->n; i++) { 2649 larray[i] = i + a->cstart; 2650 } 2651 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2652 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2653 for (i=0; i<a->A->n; i++) { 2654 colors[i] = coloring->colors[larray[i]]; 2655 } 2656 ierr = PetscFree(larray);CHKERRQ(ierr); 2657 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2658 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2659 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2660 2661 /* set coloring for off-diagonal portion */ 2662 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2663 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2664 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2665 for (i=0; i<a->B->n; i++) { 2666 colors[i] = coloring->colors[larray[i]]; 2667 } 2668 ierr = PetscFree(larray);CHKERRQ(ierr); 2669 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2670 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2671 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2672 } else { 2673 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2674 } 2675 2676 PetscFunctionReturn(0); 2677 } 2678 2679 #undef __FUNCT__ 2680 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2681 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2682 { 2683 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2684 PetscErrorCode ierr; 2685 2686 PetscFunctionBegin; 2687 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2688 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2689 PetscFunctionReturn(0); 2690 } 2691 2692 #undef __FUNCT__ 2693 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2694 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2695 { 2696 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2697 PetscErrorCode ierr; 2698 2699 PetscFunctionBegin; 2700 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2701 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2702 PetscFunctionReturn(0); 2703 } 2704 2705 #undef __FUNCT__ 2706 #define __FUNCT__ "MatMerge" 2707 /*@C 2708 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2709 matrices from each processor 2710 2711 Collective on MPI_Comm 2712 2713 Input Parameters: 2714 + comm - the communicators the parallel matrix will live on 2715 . inmat - the input sequential matrices 2716 . n - number of local columns (or PETSC_DECIDE) 2717 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2718 2719 Output Parameter: 2720 . outmat - the parallel matrix generated 2721 2722 Level: advanced 2723 2724 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2725 2726 @*/ 2727 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2728 { 2729 PetscErrorCode ierr; 2730 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2731 PetscInt *indx; 2732 PetscScalar *values; 2733 PetscMap columnmap,rowmap; 2734 2735 PetscFunctionBegin; 2736 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2737 /* 2738 PetscMPIInt rank; 2739 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2740 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2741 */ 2742 if (scall == MAT_INITIAL_MATRIX){ 2743 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2744 if (n == PETSC_DECIDE){ 2745 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2746 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2747 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2748 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2749 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2750 } 2751 2752 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2753 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2754 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2755 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2756 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2757 2758 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2759 for (i=0;i<m;i++) { 2760 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2761 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2762 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2763 } 2764 /* This routine will ONLY return MPIAIJ type matrix */ 2765 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2766 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2767 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2768 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2769 2770 } else if (scall == MAT_REUSE_MATRIX){ 2771 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2772 } else { 2773 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2774 } 2775 2776 for (i=0;i<m;i++) { 2777 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2778 I = i + rstart; 2779 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2780 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2781 } 2782 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2783 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2784 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2785 2786 PetscFunctionReturn(0); 2787 } 2788 2789 #undef __FUNCT__ 2790 #define __FUNCT__ "MatFileSplit" 2791 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2792 { 2793 PetscErrorCode ierr; 2794 PetscMPIInt rank; 2795 PetscInt m,N,i,rstart,nnz; 2796 size_t len; 2797 const PetscInt *indx; 2798 PetscViewer out; 2799 char *name; 2800 Mat B; 2801 const PetscScalar *values; 2802 2803 PetscFunctionBegin; 2804 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2805 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2806 /* Should this be the type of the diagonal block of A? */ 2807 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2808 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2809 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2810 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2811 for (i=0;i<m;i++) { 2812 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2813 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2814 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2815 } 2816 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2817 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2818 2819 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2820 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2821 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2822 sprintf(name,"%s.%d",outfile,rank); 2823 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2824 ierr = PetscFree(name); 2825 ierr = MatView(B,out);CHKERRQ(ierr); 2826 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2827 ierr = MatDestroy(B);CHKERRQ(ierr); 2828 PetscFunctionReturn(0); 2829 } 2830 2831 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2832 #undef __FUNCT__ 2833 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2834 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2835 { 2836 PetscErrorCode ierr; 2837 Mat_Merge_SeqsToMPI *merge; 2838 PetscObjectContainer container; 2839 2840 PetscFunctionBegin; 2841 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2842 if (container) { 2843 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2844 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2845 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2846 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2847 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2848 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2849 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2850 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2851 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2852 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2853 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2854 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2855 2856 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2857 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2858 } 2859 ierr = PetscFree(merge);CHKERRQ(ierr); 2860 2861 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2862 PetscFunctionReturn(0); 2863 } 2864 2865 #include "src/mat/utils/freespace.h" 2866 #include "petscbt.h" 2867 #undef __FUNCT__ 2868 #define __FUNCT__ "MatMerge_SeqsToMPI" 2869 /*@C 2870 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2871 matrices from each processor 2872 2873 Collective on MPI_Comm 2874 2875 Input Parameters: 2876 + comm - the communicators the parallel matrix will live on 2877 . seqmat - the input sequential matrices 2878 . m - number of local rows (or PETSC_DECIDE) 2879 . n - number of local columns (or PETSC_DECIDE) 2880 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2881 2882 Output Parameter: 2883 . mpimat - the parallel matrix generated 2884 2885 Level: advanced 2886 2887 Notes: 2888 The dimensions of the sequential matrix in each processor MUST be the same. 2889 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2890 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2891 @*/ 2892 static PetscEvent logkey_seqstompinum = 0; 2893 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2894 { 2895 PetscErrorCode ierr; 2896 MPI_Comm comm=mpimat->comm; 2897 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2898 PetscMPIInt size,rank,taga,*len_s; 2899 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2900 PetscInt proc,m; 2901 PetscInt **buf_ri,**buf_rj; 2902 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2903 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2904 MPI_Request *s_waits,*r_waits; 2905 MPI_Status *status; 2906 MatScalar *aa=a->a,**abuf_r,*ba_i; 2907 Mat_Merge_SeqsToMPI *merge; 2908 PetscObjectContainer container; 2909 2910 PetscFunctionBegin; 2911 if (!logkey_seqstompinum) { 2912 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2913 } 2914 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2915 2916 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2917 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2918 2919 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2920 if (container) { 2921 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2922 } 2923 bi = merge->bi; 2924 bj = merge->bj; 2925 buf_ri = merge->buf_ri; 2926 buf_rj = merge->buf_rj; 2927 2928 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2929 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2930 len_s = merge->len_s; 2931 2932 /* send and recv matrix values */ 2933 /*-----------------------------*/ 2934 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2935 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2936 2937 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2938 for (proc=0,k=0; proc<size; proc++){ 2939 if (!len_s[proc]) continue; 2940 i = owners[proc]; 2941 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2942 k++; 2943 } 2944 2945 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2946 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2947 ierr = PetscFree(status);CHKERRQ(ierr); 2948 2949 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2950 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2951 2952 /* insert mat values of mpimat */ 2953 /*----------------------------*/ 2954 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2955 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2956 nextrow = buf_ri_k + merge->nrecv; 2957 nextai = nextrow + merge->nrecv; 2958 2959 for (k=0; k<merge->nrecv; k++){ 2960 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2961 nrows = *(buf_ri_k[k]); 2962 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2963 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2964 } 2965 2966 /* set values of ba */ 2967 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2968 for (i=0; i<m; i++) { 2969 arow = owners[rank] + i; 2970 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2971 bnzi = bi[i+1] - bi[i]; 2972 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2973 2974 /* add local non-zero vals of this proc's seqmat into ba */ 2975 anzi = ai[arow+1] - ai[arow]; 2976 aj = a->j + ai[arow]; 2977 aa = a->a + ai[arow]; 2978 nextaj = 0; 2979 for (j=0; nextaj<anzi; j++){ 2980 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2981 ba_i[j] += aa[nextaj++]; 2982 } 2983 } 2984 2985 /* add received vals into ba */ 2986 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2987 /* i-th row */ 2988 if (i == *nextrow[k]) { 2989 anzi = *(nextai[k]+1) - *nextai[k]; 2990 aj = buf_rj[k] + *(nextai[k]); 2991 aa = abuf_r[k] + *(nextai[k]); 2992 nextaj = 0; 2993 for (j=0; nextaj<anzi; j++){ 2994 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2995 ba_i[j] += aa[nextaj++]; 2996 } 2997 } 2998 nextrow[k]++; nextai[k]++; 2999 } 3000 } 3001 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3002 } 3003 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3004 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3005 3006 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3007 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3008 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3009 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3010 PetscFunctionReturn(0); 3011 } 3012 static PetscEvent logkey_seqstompisym = 0; 3013 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3014 { 3015 PetscErrorCode ierr; 3016 Mat B_mpi; 3017 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3018 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3019 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3020 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3021 PetscInt len,proc,*dnz,*onz; 3022 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3023 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3024 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3025 MPI_Status *status; 3026 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3027 PetscBT lnkbt; 3028 Mat_Merge_SeqsToMPI *merge; 3029 PetscObjectContainer container; 3030 3031 PetscFunctionBegin; 3032 if (!logkey_seqstompisym) { 3033 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3034 } 3035 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3036 3037 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3038 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3039 3040 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3041 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3042 3043 /* determine row ownership */ 3044 /*---------------------------------------------------------*/ 3045 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3046 if (m == PETSC_DECIDE) { 3047 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3048 } else { 3049 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3050 } 3051 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3052 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3053 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3054 3055 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3056 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3057 3058 /* determine the number of messages to send, their lengths */ 3059 /*---------------------------------------------------------*/ 3060 len_s = merge->len_s; 3061 3062 len = 0; /* length of buf_si[] */ 3063 merge->nsend = 0; 3064 for (proc=0; proc<size; proc++){ 3065 len_si[proc] = 0; 3066 if (proc == rank){ 3067 len_s[proc] = 0; 3068 } else { 3069 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3070 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3071 } 3072 if (len_s[proc]) { 3073 merge->nsend++; 3074 nrows = 0; 3075 for (i=owners[proc]; i<owners[proc+1]; i++){ 3076 if (ai[i+1] > ai[i]) nrows++; 3077 } 3078 len_si[proc] = 2*(nrows+1); 3079 len += len_si[proc]; 3080 } 3081 } 3082 3083 /* determine the number and length of messages to receive for ij-structure */ 3084 /*-------------------------------------------------------------------------*/ 3085 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3086 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3087 3088 /* post the Irecv of j-structure */ 3089 /*-------------------------------*/ 3090 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3091 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3092 3093 /* post the Isend of j-structure */ 3094 /*--------------------------------*/ 3095 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3096 sj_waits = si_waits + merge->nsend; 3097 3098 for (proc=0, k=0; proc<size; proc++){ 3099 if (!len_s[proc]) continue; 3100 i = owners[proc]; 3101 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3102 k++; 3103 } 3104 3105 /* receives and sends of j-structure are complete */ 3106 /*------------------------------------------------*/ 3107 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3108 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3109 3110 /* send and recv i-structure */ 3111 /*---------------------------*/ 3112 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3113 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3114 3115 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3116 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3117 for (proc=0,k=0; proc<size; proc++){ 3118 if (!len_s[proc]) continue; 3119 /* form outgoing message for i-structure: 3120 buf_si[0]: nrows to be sent 3121 [1:nrows]: row index (global) 3122 [nrows+1:2*nrows+1]: i-structure index 3123 */ 3124 /*-------------------------------------------*/ 3125 nrows = len_si[proc]/2 - 1; 3126 buf_si_i = buf_si + nrows+1; 3127 buf_si[0] = nrows; 3128 buf_si_i[0] = 0; 3129 nrows = 0; 3130 for (i=owners[proc]; i<owners[proc+1]; i++){ 3131 anzi = ai[i+1] - ai[i]; 3132 if (anzi) { 3133 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3134 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3135 nrows++; 3136 } 3137 } 3138 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3139 k++; 3140 buf_si += len_si[proc]; 3141 } 3142 3143 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3144 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3145 3146 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3147 for (i=0; i<merge->nrecv; i++){ 3148 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3149 } 3150 3151 ierr = PetscFree(len_si);CHKERRQ(ierr); 3152 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3153 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3154 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3155 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3156 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3157 ierr = PetscFree(status);CHKERRQ(ierr); 3158 3159 /* compute a local seq matrix in each processor */ 3160 /*----------------------------------------------*/ 3161 /* allocate bi array and free space for accumulating nonzero column info */ 3162 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3163 bi[0] = 0; 3164 3165 /* create and initialize a linked list */ 3166 nlnk = N+1; 3167 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3168 3169 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3170 len = 0; 3171 len = ai[owners[rank+1]] - ai[owners[rank]]; 3172 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3173 current_space = free_space; 3174 3175 /* determine symbolic info for each local row */ 3176 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3177 nextrow = buf_ri_k + merge->nrecv; 3178 nextai = nextrow + merge->nrecv; 3179 for (k=0; k<merge->nrecv; k++){ 3180 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3181 nrows = *buf_ri_k[k]; 3182 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3183 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3184 } 3185 3186 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3187 len = 0; 3188 for (i=0;i<m;i++) { 3189 bnzi = 0; 3190 /* add local non-zero cols of this proc's seqmat into lnk */ 3191 arow = owners[rank] + i; 3192 anzi = ai[arow+1] - ai[arow]; 3193 aj = a->j + ai[arow]; 3194 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3195 bnzi += nlnk; 3196 /* add received col data into lnk */ 3197 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3198 if (i == *nextrow[k]) { /* i-th row */ 3199 anzi = *(nextai[k]+1) - *nextai[k]; 3200 aj = buf_rj[k] + *nextai[k]; 3201 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3202 bnzi += nlnk; 3203 nextrow[k]++; nextai[k]++; 3204 } 3205 } 3206 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3207 3208 /* if free space is not available, make more free space */ 3209 if (current_space->local_remaining<bnzi) { 3210 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3211 nspacedouble++; 3212 } 3213 /* copy data into free space, then initialize lnk */ 3214 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3215 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3216 3217 current_space->array += bnzi; 3218 current_space->local_used += bnzi; 3219 current_space->local_remaining -= bnzi; 3220 3221 bi[i+1] = bi[i] + bnzi; 3222 } 3223 3224 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3225 3226 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3227 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3228 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3229 3230 /* create symbolic parallel matrix B_mpi */ 3231 /*---------------------------------------*/ 3232 if (n==PETSC_DECIDE) { 3233 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3234 } else { 3235 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3236 } 3237 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3238 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3239 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3240 3241 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3242 B_mpi->assembled = PETSC_FALSE; 3243 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3244 merge->bi = bi; 3245 merge->bj = bj; 3246 merge->buf_ri = buf_ri; 3247 merge->buf_rj = buf_rj; 3248 merge->coi = PETSC_NULL; 3249 merge->coj = PETSC_NULL; 3250 merge->owners_co = PETSC_NULL; 3251 3252 /* attach the supporting struct to B_mpi for reuse */ 3253 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3254 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3255 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3256 *mpimat = B_mpi; 3257 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3258 PetscFunctionReturn(0); 3259 } 3260 3261 static PetscEvent logkey_seqstompi = 0; 3262 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3263 { 3264 PetscErrorCode ierr; 3265 3266 PetscFunctionBegin; 3267 if (!logkey_seqstompi) { 3268 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3269 } 3270 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3271 if (scall == MAT_INITIAL_MATRIX){ 3272 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3273 } 3274 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3275 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3276 PetscFunctionReturn(0); 3277 } 3278 static PetscEvent logkey_getlocalmat = 0; 3279 #undef __FUNCT__ 3280 #define __FUNCT__ "MatGetLocalMat" 3281 /*@C 3282 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3283 3284 Not Collective 3285 3286 Input Parameters: 3287 + A - the matrix 3288 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3289 3290 Output Parameter: 3291 . A_loc - the local sequential matrix generated 3292 3293 Level: developer 3294 3295 @*/ 3296 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3297 { 3298 PetscErrorCode ierr; 3299 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3300 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3301 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3302 PetscScalar *aa=a->a,*ba=b->a,*ca; 3303 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3304 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3305 3306 PetscFunctionBegin; 3307 if (!logkey_getlocalmat) { 3308 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3309 } 3310 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3311 if (scall == MAT_INITIAL_MATRIX){ 3312 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3313 ci[0] = 0; 3314 for (i=0; i<am; i++){ 3315 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3316 } 3317 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3318 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3319 k = 0; 3320 for (i=0; i<am; i++) { 3321 ncols_o = bi[i+1] - bi[i]; 3322 ncols_d = ai[i+1] - ai[i]; 3323 /* off-diagonal portion of A */ 3324 for (jo=0; jo<ncols_o; jo++) { 3325 col = cmap[*bj]; 3326 if (col >= cstart) break; 3327 cj[k] = col; bj++; 3328 ca[k++] = *ba++; 3329 } 3330 /* diagonal portion of A */ 3331 for (j=0; j<ncols_d; j++) { 3332 cj[k] = cstart + *aj++; 3333 ca[k++] = *aa++; 3334 } 3335 /* off-diagonal portion of A */ 3336 for (j=jo; j<ncols_o; j++) { 3337 cj[k] = cmap[*bj++]; 3338 ca[k++] = *ba++; 3339 } 3340 } 3341 /* put together the new matrix */ 3342 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3343 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3344 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3345 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3346 mat->freedata = PETSC_TRUE; 3347 mat->nonew = 0; 3348 } else if (scall == MAT_REUSE_MATRIX){ 3349 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3350 ci = mat->i; cj = mat->j; ca = mat->a; 3351 for (i=0; i<am; i++) { 3352 /* off-diagonal portion of A */ 3353 ncols_o = bi[i+1] - bi[i]; 3354 for (jo=0; jo<ncols_o; jo++) { 3355 col = cmap[*bj]; 3356 if (col >= cstart) break; 3357 *ca++ = *ba++; bj++; 3358 } 3359 /* diagonal portion of A */ 3360 ncols_d = ai[i+1] - ai[i]; 3361 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3362 /* off-diagonal portion of A */ 3363 for (j=jo; j<ncols_o; j++) { 3364 *ca++ = *ba++; bj++; 3365 } 3366 } 3367 } else { 3368 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3369 } 3370 3371 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3372 PetscFunctionReturn(0); 3373 } 3374 3375 static PetscEvent logkey_getlocalmatcondensed = 0; 3376 #undef __FUNCT__ 3377 #define __FUNCT__ "MatGetLocalMatCondensed" 3378 /*@C 3379 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3380 3381 Not Collective 3382 3383 Input Parameters: 3384 + A - the matrix 3385 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3386 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3387 3388 Output Parameter: 3389 . A_loc - the local sequential matrix generated 3390 3391 Level: developer 3392 3393 @*/ 3394 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3395 { 3396 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3397 PetscErrorCode ierr; 3398 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3399 IS isrowa,iscola; 3400 Mat *aloc; 3401 3402 PetscFunctionBegin; 3403 if (!logkey_getlocalmatcondensed) { 3404 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3405 } 3406 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3407 if (!row){ 3408 start = a->rstart; end = a->rend; 3409 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3410 } else { 3411 isrowa = *row; 3412 } 3413 if (!col){ 3414 start = a->cstart; 3415 cmap = a->garray; 3416 nzA = a->A->n; 3417 nzB = a->B->n; 3418 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3419 ncols = 0; 3420 for (i=0; i<nzB; i++) { 3421 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3422 else break; 3423 } 3424 imark = i; 3425 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3426 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3427 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3428 ierr = PetscFree(idx);CHKERRQ(ierr); 3429 } else { 3430 iscola = *col; 3431 } 3432 if (scall != MAT_INITIAL_MATRIX){ 3433 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3434 aloc[0] = *A_loc; 3435 } 3436 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3437 *A_loc = aloc[0]; 3438 ierr = PetscFree(aloc);CHKERRQ(ierr); 3439 if (!row){ 3440 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3441 } 3442 if (!col){ 3443 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3444 } 3445 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3446 PetscFunctionReturn(0); 3447 } 3448 3449 static PetscEvent logkey_GetBrowsOfAcols = 0; 3450 #undef __FUNCT__ 3451 #define __FUNCT__ "MatGetBrowsOfAcols" 3452 /*@C 3453 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3454 3455 Collective on Mat 3456 3457 Input Parameters: 3458 + A,B - the matrices in mpiaij format 3459 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3460 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3461 3462 Output Parameter: 3463 + rowb, colb - index sets of rows and columns of B to extract 3464 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3465 - B_seq - the sequential matrix generated 3466 3467 Level: developer 3468 3469 @*/ 3470 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3471 { 3472 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3473 PetscErrorCode ierr; 3474 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3475 IS isrowb,iscolb; 3476 Mat *bseq; 3477 3478 PetscFunctionBegin; 3479 if (a->cstart != b->rstart || a->cend != b->rend){ 3480 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3481 } 3482 if (!logkey_GetBrowsOfAcols) { 3483 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3484 } 3485 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3486 3487 if (scall == MAT_INITIAL_MATRIX){ 3488 start = a->cstart; 3489 cmap = a->garray; 3490 nzA = a->A->n; 3491 nzB = a->B->n; 3492 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3493 ncols = 0; 3494 for (i=0; i<nzB; i++) { /* row < local row index */ 3495 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3496 else break; 3497 } 3498 imark = i; 3499 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3500 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3501 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3502 ierr = PetscFree(idx);CHKERRQ(ierr); 3503 *brstart = imark; 3504 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3505 } else { 3506 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3507 isrowb = *rowb; iscolb = *colb; 3508 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3509 bseq[0] = *B_seq; 3510 } 3511 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3512 *B_seq = bseq[0]; 3513 ierr = PetscFree(bseq);CHKERRQ(ierr); 3514 if (!rowb){ 3515 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3516 } else { 3517 *rowb = isrowb; 3518 } 3519 if (!colb){ 3520 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3521 } else { 3522 *colb = iscolb; 3523 } 3524 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3525 PetscFunctionReturn(0); 3526 } 3527 3528 static PetscEvent logkey_GetBrowsOfAocols = 0; 3529 #undef __FUNCT__ 3530 #define __FUNCT__ "MatGetBrowsOfAoCols" 3531 /*@C 3532 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3533 of the OFF-DIAGONAL portion of local A 3534 3535 Collective on Mat 3536 3537 Input Parameters: 3538 + A,B - the matrices in mpiaij format 3539 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3540 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3541 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3542 3543 Output Parameter: 3544 + B_oth - the sequential matrix generated 3545 3546 Level: developer 3547 3548 @*/ 3549 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3550 { 3551 VecScatter_MPI_General *gen_to,*gen_from; 3552 PetscErrorCode ierr; 3553 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3554 Mat_SeqAIJ *b_oth; 3555 VecScatter ctx=a->Mvctx; 3556 MPI_Comm comm=ctx->comm; 3557 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3558 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3559 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3560 PetscInt i,k,l,nrecvs,nsends,nrows,*rrow,*srow,*rstarts,*rstartsj,*sstarts,*sstartsj,len; 3561 MPI_Request *rwaits,*swaits; 3562 MPI_Status *sstatus,rstatus; 3563 PetscInt *cols; 3564 PetscScalar *vals; 3565 PetscMPIInt j; 3566 3567 PetscFunctionBegin; 3568 if (a->cstart != b->rstart || a->cend != b->rend){ 3569 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3570 } 3571 if (!logkey_GetBrowsOfAocols) { 3572 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3573 } 3574 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3575 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3576 3577 gen_to = (VecScatter_MPI_General*)ctx->todata; 3578 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3579 rvalues = gen_from->values; /* holds the length of sending row */ 3580 svalues = gen_to->values; /* holds the length of receiving row */ 3581 nrecvs = gen_from->n; 3582 nsends = gen_to->n; 3583 rwaits = gen_from->requests; 3584 swaits = gen_to->requests; 3585 rrow = gen_from->indices; /* local row index to be received */ 3586 srow = gen_to->indices; /* local row index to be sent */ 3587 rstarts = gen_from->starts; 3588 sstarts = gen_to->starts; 3589 rprocs = gen_from->procs; 3590 sprocs = gen_to->procs; 3591 sstatus = gen_to->sstatus; 3592 3593 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3594 if (scall == MAT_INITIAL_MATRIX){ 3595 /* i-array */ 3596 /*---------*/ 3597 /* post receives */ 3598 for (i=0; i<nrecvs; i++){ 3599 rowlen = (PetscInt*)rvalues + rstarts[i]; 3600 nrows = rstarts[i+1]-rstarts[i]; 3601 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3602 } 3603 3604 /* pack the outgoing message */ 3605 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3606 rstartsj = sstartsj + nsends +1; 3607 sstartsj[0] = 0; rstartsj[0] = 0; 3608 len = 0; /* total length of j or a array to be sent */ 3609 k = 0; 3610 for (i=0; i<nsends; i++){ 3611 rowlen = (PetscInt*)svalues + sstarts[i]; 3612 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3613 for (j=0; j<nrows; j++) { 3614 row = srow[k] + b->rowners[rank]; /* global row idx */ 3615 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3616 len += rowlen[j]; 3617 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3618 k++; 3619 } 3620 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3621 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3622 } 3623 /* recvs and sends of i-array are completed */ 3624 i = nrecvs; 3625 while (i--) { 3626 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3627 } 3628 if (nsends) { 3629 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3630 } 3631 /* allocate buffers for sending j and a arrays */ 3632 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3633 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3634 3635 /* create i-array of B_oth */ 3636 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3637 b_othi[0] = 0; 3638 len = 0; /* total length of j or a array to be received */ 3639 k = 0; 3640 for (i=0; i<nrecvs; i++){ 3641 rowlen = (PetscInt*)rvalues + rstarts[i]; 3642 nrows = rstarts[i+1]-rstarts[i]; 3643 for (j=0; j<nrows; j++) { 3644 b_othi[k+1] = b_othi[k] + rowlen[j]; 3645 len += rowlen[j]; k++; 3646 } 3647 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3648 } 3649 3650 /* allocate space for j and a arrrays of B_oth */ 3651 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3652 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3653 3654 /* j-array */ 3655 /*---------*/ 3656 /* post receives of j-array */ 3657 for (i=0; i<nrecvs; i++){ 3658 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3659 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3660 } 3661 k = 0; 3662 for (i=0; i<nsends; i++){ 3663 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3664 bufJ = bufj+sstartsj[i]; 3665 for (j=0; j<nrows; j++) { 3666 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3667 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3668 for (l=0; l<ncols; l++){ 3669 *bufJ++ = cols[l]; 3670 } 3671 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3672 } 3673 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3674 } 3675 3676 /* recvs and sends of j-array are completed */ 3677 i = nrecvs; 3678 while (i--) { 3679 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3680 } 3681 if (nsends) { 3682 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3683 } 3684 } else if (scall == MAT_REUSE_MATRIX){ 3685 sstartsj = *startsj; 3686 rstartsj = sstartsj + nsends +1; 3687 bufa = *bufa_ptr; 3688 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3689 b_otha = b_oth->a; 3690 } else { 3691 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3692 } 3693 3694 /* a-array */ 3695 /*---------*/ 3696 /* post receives of a-array */ 3697 for (i=0; i<nrecvs; i++){ 3698 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3699 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3700 } 3701 k = 0; 3702 for (i=0; i<nsends; i++){ 3703 nrows = sstarts[i+1]-sstarts[i]; 3704 bufA = bufa+sstartsj[i]; 3705 for (j=0; j<nrows; j++) { 3706 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3707 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3708 for (l=0; l<ncols; l++){ 3709 *bufA++ = vals[l]; 3710 } 3711 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3712 3713 } 3714 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3715 } 3716 /* recvs and sends of a-array are completed */ 3717 i = nrecvs; 3718 while (i--) { 3719 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3720 } 3721 if (nsends) { 3722 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3723 } 3724 3725 if (scall == MAT_INITIAL_MATRIX){ 3726 /* put together the new matrix */ 3727 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3728 3729 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3730 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3731 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3732 b_oth->freedata = PETSC_TRUE; 3733 b_oth->nonew = 0; 3734 3735 ierr = PetscFree(bufj);CHKERRQ(ierr); 3736 if (!startsj || !bufa_ptr){ 3737 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3738 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3739 } else { 3740 *startsj = sstartsj; 3741 *bufa_ptr = bufa; 3742 } 3743 } 3744 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3745 3746 PetscFunctionReturn(0); 3747 } 3748 3749 /*MC 3750 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3751 3752 Options Database Keys: 3753 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3754 3755 Level: beginner 3756 3757 .seealso: MatCreateMPIAIJ 3758 M*/ 3759 3760 EXTERN_C_BEGIN 3761 #undef __FUNCT__ 3762 #define __FUNCT__ "MatCreate_MPIAIJ" 3763 PetscErrorCode MatCreate_MPIAIJ(Mat B) 3764 { 3765 Mat_MPIAIJ *b; 3766 PetscErrorCode ierr; 3767 PetscInt i; 3768 PetscMPIInt size; 3769 3770 PetscFunctionBegin; 3771 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3772 3773 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3774 B->data = (void*)b; 3775 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3776 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3777 B->factor = 0; 3778 B->bs = 1; 3779 B->assembled = PETSC_FALSE; 3780 B->mapping = 0; 3781 3782 B->insertmode = NOT_SET_VALUES; 3783 b->size = size; 3784 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3785 3786 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3787 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3788 3789 /* the information in the maps duplicates the information computed below, eventually 3790 we should remove the duplicate information that is not contained in the maps */ 3791 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3792 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3793 3794 /* build local table of row and column ownerships */ 3795 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3796 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 3797 b->cowners = b->rowners + b->size + 2; 3798 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3799 b->rowners[0] = 0; 3800 for (i=2; i<=b->size; i++) { 3801 b->rowners[i] += b->rowners[i-1]; 3802 } 3803 b->rstart = b->rowners[b->rank]; 3804 b->rend = b->rowners[b->rank+1]; 3805 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3806 b->cowners[0] = 0; 3807 for (i=2; i<=b->size; i++) { 3808 b->cowners[i] += b->cowners[i-1]; 3809 } 3810 b->cstart = b->cowners[b->rank]; 3811 b->cend = b->cowners[b->rank+1]; 3812 3813 /* build cache for off array entries formed */ 3814 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3815 b->donotstash = PETSC_FALSE; 3816 b->colmap = 0; 3817 b->garray = 0; 3818 b->roworiented = PETSC_TRUE; 3819 3820 /* stuff used for matrix vector multiply */ 3821 b->lvec = PETSC_NULL; 3822 b->Mvctx = PETSC_NULL; 3823 3824 /* stuff for MatGetRow() */ 3825 b->rowindices = 0; 3826 b->rowvalues = 0; 3827 b->getrowactive = PETSC_FALSE; 3828 3829 /* Explicitly create 2 MATSEQAIJ matrices. */ 3830 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3831 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3832 PetscLogObjectParent(B,b->A); 3833 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3834 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3835 PetscLogObjectParent(B,b->B); 3836 3837 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3838 "MatStoreValues_MPIAIJ", 3839 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3840 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3841 "MatRetrieveValues_MPIAIJ", 3842 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3843 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3844 "MatGetDiagonalBlock_MPIAIJ", 3845 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3846 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3847 "MatIsTranspose_MPIAIJ", 3848 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3849 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3850 "MatMPIAIJSetPreallocation_MPIAIJ", 3851 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3852 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3853 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3854 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3855 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3856 "MatDiagonalScaleLocal_MPIAIJ", 3857 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3858 PetscFunctionReturn(0); 3859 } 3860 EXTERN_C_END 3861