1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (value == 0.0 && ignorezeroentries) goto a_noinsert; \ 58 if (nonew == 1) goto a_noinsert; \ 59 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 60 if (nrow >= rmax) { \ 61 /* there is no extra room in row, therefore enlarge */ \ 62 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 63 PetscScalar *new_a; \ 64 \ 65 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 66 \ 67 /* malloc new storage space */ \ 68 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 69 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 70 new_j = (PetscInt*)(new_a + new_nz); \ 71 new_i = new_j + new_nz; \ 72 \ 73 /* copy over old data into new slots */ \ 74 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 75 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 76 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 77 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 78 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 79 len*sizeof(PetscInt));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 81 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 82 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 83 /* free up old matrix storage */ \ 84 \ 85 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 86 if (!a->singlemalloc) { \ 87 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 88 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 89 } \ 90 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 91 a->singlemalloc = PETSC_TRUE; \ 92 \ 93 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 94 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 95 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 96 a->maxnz += CHUNKSIZE; \ 97 a->reallocs++; \ 98 } \ 99 N = nrow++ - 1; a->nz++; \ 100 /* shift up all the later entries in this row */ \ 101 for (ii=N; ii>=_i; ii--) { \ 102 rp[ii+1] = rp[ii]; \ 103 ap[ii+1] = ap[ii]; \ 104 } \ 105 rp[_i] = col1; \ 106 ap[_i] = value; \ 107 a_noinsert: ; \ 108 ailen[row] = nrow; \ 109 } 110 111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 112 { \ 113 \ 114 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 115 rmax = bimax[row]; nrow = bilen[row]; \ 116 col1 = col - shift; \ 117 \ 118 low = 0; high = nrow; \ 119 while (high-low > 5) { \ 120 t = (low+high)/2; \ 121 if (rp[t] > col) high = t; \ 122 else low = t; \ 123 } \ 124 for (_i=low; _i<high; _i++) { \ 125 if (rp[_i] > col1) break; \ 126 if (rp[_i] == col1) { \ 127 if (addv == ADD_VALUES) ap[_i] += value; \ 128 else ap[_i] = value; \ 129 goto b_noinsert; \ 130 } \ 131 } \ 132 if (value == 0.0 && ignorezeroentries) goto b_noinsert; \ 133 if (nonew == 1) goto b_noinsert; \ 134 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 135 if (nrow >= rmax) { \ 136 /* there is no extra room in row, therefore enlarge */ \ 137 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 138 PetscScalar *new_a; \ 139 \ 140 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 141 \ 142 /* malloc new storage space */ \ 143 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 144 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 145 new_j = (PetscInt*)(new_a + new_nz); \ 146 new_i = new_j + new_nz; \ 147 \ 148 /* copy over old data into new slots */ \ 149 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 150 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 151 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 152 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 153 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 154 len*sizeof(PetscInt));CHKERRQ(ierr); \ 155 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 157 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 158 /* free up old matrix storage */ \ 159 \ 160 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 161 if (!b->singlemalloc) { \ 162 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 163 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 164 } \ 165 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 166 b->singlemalloc = PETSC_TRUE; \ 167 \ 168 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 169 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 170 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 171 b->maxnz += CHUNKSIZE; \ 172 b->reallocs++; \ 173 } \ 174 N = nrow++ - 1; b->nz++; \ 175 /* shift up all the later entries in this row */ \ 176 for (ii=N; ii>=_i; ii--) { \ 177 rp[ii+1] = rp[ii]; \ 178 ap[ii+1] = ap[ii]; \ 179 } \ 180 rp[_i] = col1; \ 181 ap[_i] = value; \ 182 b_noinsert: ; \ 183 bilen[row] = nrow; \ 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatSetValues_MPIAIJ" 188 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 189 { 190 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 191 PetscScalar value; 192 PetscErrorCode ierr; 193 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 194 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 195 PetscTruth roworiented = aij->roworiented; 196 197 /* Some Variables required in the macro */ 198 Mat A = aij->A; 199 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 200 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 201 PetscScalar *aa = a->a; 202 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 203 Mat B = aij->B; 204 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 205 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 206 PetscScalar *ba = b->a; 207 208 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 209 PetscInt nonew = a->nonew,shift=0; 210 PetscScalar *ap; 211 212 PetscFunctionBegin; 213 for (i=0; i<m; i++) { 214 if (im[i] < 0) continue; 215 #if defined(PETSC_USE_DEBUG) 216 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 217 #endif 218 if (im[i] >= rstart && im[i] < rend) { 219 row = im[i] - rstart; 220 for (j=0; j<n; j++) { 221 if (in[j] >= cstart && in[j] < cend){ 222 col = in[j] - cstart; 223 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 224 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 225 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 226 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 227 } else if (in[j] < 0) continue; 228 #if defined(PETSC_USE_DEBUG) 229 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 230 #endif 231 else { 232 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 233 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 234 if (mat->was_assembled) { 235 if (!aij->colmap) { 236 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 237 } 238 #if defined (PETSC_USE_CTABLE) 239 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 240 col--; 241 #else 242 col = aij->colmap[in[j]] - 1; 243 #endif 244 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 245 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 246 col = in[j]; 247 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 248 B = aij->B; 249 b = (Mat_SeqAIJ*)B->data; 250 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 251 ba = b->a; 252 } 253 } else col = in[j]; 254 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 255 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 256 } 257 } 258 } else { 259 if (!aij->donotstash) { 260 if (roworiented) { 261 if (ignorezeroentries && v[i*n] == 0.0) continue; 262 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 263 } else { 264 if (ignorezeroentries && v[i] == 0.0) continue; 265 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 266 } 267 } 268 } 269 } 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatGetValues_MPIAIJ" 275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 276 { 277 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 278 PetscErrorCode ierr; 279 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 280 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 281 282 PetscFunctionBegin; 283 for (i=0; i<m; i++) { 284 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 285 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 286 if (idxm[i] >= rstart && idxm[i] < rend) { 287 row = idxm[i] - rstart; 288 for (j=0; j<n; j++) { 289 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 290 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 291 if (idxn[j] >= cstart && idxn[j] < cend){ 292 col = idxn[j] - cstart; 293 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 294 } else { 295 if (!aij->colmap) { 296 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 297 } 298 #if defined (PETSC_USE_CTABLE) 299 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 300 col --; 301 #else 302 col = aij->colmap[idxn[j]] - 1; 303 #endif 304 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 305 else { 306 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 307 } 308 } 309 } 310 } else { 311 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 312 } 313 } 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 320 { 321 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 322 PetscErrorCode ierr; 323 PetscInt nstash,reallocs; 324 InsertMode addv; 325 326 PetscFunctionBegin; 327 if (aij->donotstash) { 328 PetscFunctionReturn(0); 329 } 330 331 /* make sure all processors are either in INSERTMODE or ADDMODE */ 332 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 333 if (addv == (ADD_VALUES|INSERT_VALUES)) { 334 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 335 } 336 mat->insertmode = addv; /* in case this processor had no cache */ 337 338 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 339 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 340 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 341 PetscFunctionReturn(0); 342 } 343 344 #undef __FUNCT__ 345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 347 { 348 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 349 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 350 PetscErrorCode ierr; 351 PetscMPIInt n; 352 PetscInt i,j,rstart,ncols,flg; 353 PetscInt *row,*col,other_disassembled; 354 PetscScalar *val; 355 InsertMode addv = mat->insertmode; 356 357 PetscFunctionBegin; 358 if (!aij->donotstash) { 359 while (1) { 360 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 361 if (!flg) break; 362 363 for (i=0; i<n;) { 364 /* Now identify the consecutive vals belonging to the same row */ 365 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 366 if (j < n) ncols = j-i; 367 else ncols = n-i; 368 /* Now assemble all these values with a single function call */ 369 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 370 i = j; 371 } 372 } 373 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 374 } 375 a->compressedrow.use = PETSC_FALSE; 376 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 377 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 378 379 /* determine if any processor has disassembled, if so we must 380 also disassemble ourselfs, in order that we may reassemble. */ 381 /* 382 if nonzero structure of submatrix B cannot change then we know that 383 no processor disassembled thus we can skip this stuff 384 */ 385 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 386 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 387 if (mat->was_assembled && !other_disassembled) { 388 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 389 /* reaccess the b because aij->B was changed */ 390 b = (Mat_SeqAIJ *)aij->B->data; 391 } 392 } 393 394 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 395 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 396 } 397 398 b->inode.use = PETSC_FALSE; 399 b->compressedrow.use = PETSC_TRUE; 400 if (aij->B->bs !=1){ 401 /* printf("aij->B->bs: %d, mat: bs=%d, dim=%d, %d, %d, %d\n",aij->B->bs,mat->bs,mat->m,mat->n,mat->M,mat->N);*/ 402 aij->B->bs = 1; /* why it could !=1 ? */ 403 } 404 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 405 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 406 407 if (aij->rowvalues) { 408 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 409 aij->rowvalues = 0; 410 } 411 412 /* used by MatAXPY() */ 413 a->xtoy = 0; b->xtoy = 0; 414 a->XtoY = 0; b->XtoY = 0; 415 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 421 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 422 { 423 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 424 PetscErrorCode ierr; 425 426 PetscFunctionBegin; 427 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 428 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 429 PetscFunctionReturn(0); 430 } 431 432 #undef __FUNCT__ 433 #define __FUNCT__ "MatZeroRows_MPIAIJ" 434 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 435 { 436 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 437 PetscErrorCode ierr; 438 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag; 439 PetscInt i,N,*rows,*owners = l->rowners; 440 PetscInt *nprocs,j,idx,nsends,row; 441 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 442 PetscInt *rvalues,count,base,slen,*source; 443 PetscInt *lens,*lrows,*values,rstart=l->rstart; 444 MPI_Comm comm = A->comm; 445 MPI_Request *send_waits,*recv_waits; 446 MPI_Status recv_status,*send_status; 447 IS istmp; 448 PetscTruth found; 449 450 PetscFunctionBegin; 451 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 452 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 453 454 /* first count number of contributors to each processor */ 455 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 456 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 457 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 458 for (i=0; i<N; i++) { 459 idx = rows[i]; 460 found = PETSC_FALSE; 461 for (j=0; j<size; j++) { 462 if (idx >= owners[j] && idx < owners[j+1]) { 463 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 464 } 465 } 466 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 467 } 468 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 469 470 /* inform other processors of number of messages and max length*/ 471 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 472 473 /* post receives: */ 474 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 475 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 476 for (i=0; i<nrecvs; i++) { 477 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 478 } 479 480 /* do sends: 481 1) starts[i] gives the starting index in svalues for stuff going to 482 the ith processor 483 */ 484 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 485 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 486 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 487 starts[0] = 0; 488 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 489 for (i=0; i<N; i++) { 490 svalues[starts[owner[i]]++] = rows[i]; 491 } 492 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 493 494 starts[0] = 0; 495 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 496 count = 0; 497 for (i=0; i<size; i++) { 498 if (nprocs[2*i+1]) { 499 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 500 } 501 } 502 ierr = PetscFree(starts);CHKERRQ(ierr); 503 504 base = owners[rank]; 505 506 /* wait on receives */ 507 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 508 source = lens + nrecvs; 509 count = nrecvs; slen = 0; 510 while (count) { 511 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 512 /* unpack receives into our local space */ 513 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 514 source[imdex] = recv_status.MPI_SOURCE; 515 lens[imdex] = n; 516 slen += n; 517 count--; 518 } 519 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 520 521 /* move the data into the send scatter */ 522 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 523 count = 0; 524 for (i=0; i<nrecvs; i++) { 525 values = rvalues + i*nmax; 526 for (j=0; j<lens[i]; j++) { 527 lrows[count++] = values[j] - base; 528 } 529 } 530 ierr = PetscFree(rvalues);CHKERRQ(ierr); 531 ierr = PetscFree(lens);CHKERRQ(ierr); 532 ierr = PetscFree(owner);CHKERRQ(ierr); 533 ierr = PetscFree(nprocs);CHKERRQ(ierr); 534 535 /* actually zap the local rows */ 536 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 537 PetscLogObjectParent(A,istmp); 538 539 /* 540 Zero the required rows. If the "diagonal block" of the matrix 541 is square and the user wishes to set the diagonal we use seperate 542 code so that MatSetValues() is not called for each diagonal allocating 543 new memory, thus calling lots of mallocs and slowing things down. 544 545 Contributed by: Mathew Knepley 546 */ 547 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 548 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 549 if (diag && (l->A->M == l->A->N)) { 550 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 551 } else if (diag) { 552 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 553 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 554 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 555 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 556 } 557 for (i = 0; i < slen; i++) { 558 row = lrows[i] + rstart; 559 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 560 } 561 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 562 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 563 } else { 564 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 565 } 566 ierr = ISDestroy(istmp);CHKERRQ(ierr); 567 ierr = PetscFree(lrows);CHKERRQ(ierr); 568 569 /* wait on sends */ 570 if (nsends) { 571 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 572 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 573 ierr = PetscFree(send_status);CHKERRQ(ierr); 574 } 575 ierr = PetscFree(send_waits);CHKERRQ(ierr); 576 ierr = PetscFree(svalues);CHKERRQ(ierr); 577 578 PetscFunctionReturn(0); 579 } 580 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatMult_MPIAIJ" 583 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 584 { 585 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 586 PetscErrorCode ierr; 587 PetscInt nt; 588 589 PetscFunctionBegin; 590 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 591 if (nt != A->n) { 592 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 593 } 594 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 595 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 596 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 597 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 598 PetscFunctionReturn(0); 599 } 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatMultAdd_MPIAIJ" 603 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 604 { 605 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 606 PetscErrorCode ierr; 607 608 PetscFunctionBegin; 609 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 610 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 611 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 612 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 613 PetscFunctionReturn(0); 614 } 615 616 #undef __FUNCT__ 617 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 618 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 619 { 620 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 621 PetscErrorCode ierr; 622 623 PetscFunctionBegin; 624 /* do nondiagonal part */ 625 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 626 /* send it on its way */ 627 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 628 /* do local part */ 629 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 630 /* receive remote parts: note this assumes the values are not actually */ 631 /* inserted in yy until the next line, which is true for my implementation*/ 632 /* but is not perhaps always true. */ 633 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 634 PetscFunctionReturn(0); 635 } 636 637 EXTERN_C_BEGIN 638 #undef __FUNCT__ 639 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 640 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 641 { 642 MPI_Comm comm; 643 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 644 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 645 IS Me,Notme; 646 PetscErrorCode ierr; 647 PetscInt M,N,first,last,*notme,i; 648 PetscMPIInt size; 649 650 PetscFunctionBegin; 651 652 /* Easy test: symmetric diagonal block */ 653 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 654 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 655 if (!*f) PetscFunctionReturn(0); 656 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 657 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 658 if (size == 1) PetscFunctionReturn(0); 659 660 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 661 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 662 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 663 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 664 for (i=0; i<first; i++) notme[i] = i; 665 for (i=last; i<M; i++) notme[i-last+first] = i; 666 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 667 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 668 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 669 Aoff = Aoffs[0]; 670 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 671 Boff = Boffs[0]; 672 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 673 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 674 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 675 ierr = ISDestroy(Me);CHKERRQ(ierr); 676 ierr = ISDestroy(Notme);CHKERRQ(ierr); 677 678 PetscFunctionReturn(0); 679 } 680 EXTERN_C_END 681 682 #undef __FUNCT__ 683 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 684 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 685 { 686 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 687 PetscErrorCode ierr; 688 689 PetscFunctionBegin; 690 /* do nondiagonal part */ 691 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 692 /* send it on its way */ 693 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 694 /* do local part */ 695 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 696 /* receive remote parts: note this assumes the values are not actually */ 697 /* inserted in yy until the next line, which is true for my implementation*/ 698 /* but is not perhaps always true. */ 699 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 700 PetscFunctionReturn(0); 701 } 702 703 /* 704 This only works correctly for square matrices where the subblock A->A is the 705 diagonal block 706 */ 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 709 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 710 { 711 PetscErrorCode ierr; 712 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 713 714 PetscFunctionBegin; 715 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 716 if (a->rstart != a->cstart || a->rend != a->cend) { 717 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 718 } 719 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 720 PetscFunctionReturn(0); 721 } 722 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatScale_MPIAIJ" 725 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 726 { 727 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 728 PetscErrorCode ierr; 729 730 PetscFunctionBegin; 731 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 732 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 733 PetscFunctionReturn(0); 734 } 735 736 #undef __FUNCT__ 737 #define __FUNCT__ "MatDestroy_MPIAIJ" 738 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 739 { 740 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 741 PetscErrorCode ierr; 742 743 PetscFunctionBegin; 744 #if defined(PETSC_USE_LOG) 745 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 746 #endif 747 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 748 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 749 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 750 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 751 #if defined (PETSC_USE_CTABLE) 752 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 753 #else 754 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 755 #endif 756 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 757 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 758 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 759 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 760 ierr = PetscFree(aij);CHKERRQ(ierr); 761 762 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 763 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 764 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 765 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 766 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 767 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 768 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 769 PetscFunctionReturn(0); 770 } 771 772 #undef __FUNCT__ 773 #define __FUNCT__ "MatView_MPIAIJ_Binary" 774 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 775 { 776 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 777 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 778 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 779 PetscErrorCode ierr; 780 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 781 int fd; 782 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 783 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 784 PetscScalar *column_values; 785 786 PetscFunctionBegin; 787 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 788 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 789 nz = A->nz + B->nz; 790 if (!rank) { 791 header[0] = MAT_FILE_COOKIE; 792 header[1] = mat->M; 793 header[2] = mat->N; 794 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 795 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 796 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 797 /* get largest number of rows any processor has */ 798 rlen = mat->m; 799 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 800 for (i=1; i<size; i++) { 801 rlen = PetscMax(rlen,range[i+1] - range[i]); 802 } 803 } else { 804 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 805 rlen = mat->m; 806 } 807 808 /* load up the local row counts */ 809 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 810 for (i=0; i<mat->m; i++) { 811 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 812 } 813 814 /* store the row lengths to the file */ 815 if (!rank) { 816 MPI_Status status; 817 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 818 for (i=1; i<size; i++) { 819 rlen = range[i+1] - range[i]; 820 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 821 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 822 } 823 } else { 824 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 825 } 826 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 827 828 /* load up the local column indices */ 829 nzmax = nz; /* )th processor needs space a largest processor needs */ 830 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 831 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 832 cnt = 0; 833 for (i=0; i<mat->m; i++) { 834 for (j=B->i[i]; j<B->i[i+1]; j++) { 835 if ( (col = garray[B->j[j]]) > cstart) break; 836 column_indices[cnt++] = col; 837 } 838 for (k=A->i[i]; k<A->i[i+1]; k++) { 839 column_indices[cnt++] = A->j[k] + cstart; 840 } 841 for (; j<B->i[i+1]; j++) { 842 column_indices[cnt++] = garray[B->j[j]]; 843 } 844 } 845 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 846 847 /* store the column indices to the file */ 848 if (!rank) { 849 MPI_Status status; 850 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 851 for (i=1; i<size; i++) { 852 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 853 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 854 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 855 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 856 } 857 } else { 858 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 859 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 860 } 861 ierr = PetscFree(column_indices);CHKERRQ(ierr); 862 863 /* load up the local column values */ 864 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 865 cnt = 0; 866 for (i=0; i<mat->m; i++) { 867 for (j=B->i[i]; j<B->i[i+1]; j++) { 868 if ( garray[B->j[j]] > cstart) break; 869 column_values[cnt++] = B->a[j]; 870 } 871 for (k=A->i[i]; k<A->i[i+1]; k++) { 872 column_values[cnt++] = A->a[k]; 873 } 874 for (; j<B->i[i+1]; j++) { 875 column_values[cnt++] = B->a[j]; 876 } 877 } 878 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 879 880 /* store the column values to the file */ 881 if (!rank) { 882 MPI_Status status; 883 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 884 for (i=1; i<size; i++) { 885 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 886 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 887 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 888 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 889 } 890 } else { 891 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 892 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 893 } 894 ierr = PetscFree(column_values);CHKERRQ(ierr); 895 PetscFunctionReturn(0); 896 } 897 898 #undef __FUNCT__ 899 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 900 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 901 { 902 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 903 PetscErrorCode ierr; 904 PetscMPIInt rank = aij->rank,size = aij->size; 905 PetscTruth isdraw,iascii,flg,isbinary; 906 PetscViewer sviewer; 907 PetscViewerFormat format; 908 909 PetscFunctionBegin; 910 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 911 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 912 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 913 if (iascii) { 914 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 915 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 916 MatInfo info; 917 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 918 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 919 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 920 if (flg) { 921 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 922 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 923 } else { 924 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 925 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 926 } 927 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 928 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 929 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 930 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 931 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 932 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 933 PetscFunctionReturn(0); 934 } else if (format == PETSC_VIEWER_ASCII_INFO) { 935 PetscFunctionReturn(0); 936 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 937 PetscFunctionReturn(0); 938 } 939 } else if (isbinary) { 940 if (size == 1) { 941 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 942 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 943 } else { 944 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 945 } 946 PetscFunctionReturn(0); 947 } else if (isdraw) { 948 PetscDraw draw; 949 PetscTruth isnull; 950 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 951 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 952 } 953 954 if (size == 1) { 955 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 956 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 957 } else { 958 /* assemble the entire matrix onto first processor. */ 959 Mat A; 960 Mat_SeqAIJ *Aloc; 961 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 962 PetscScalar *a; 963 964 if (!rank) { 965 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 966 } else { 967 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 968 } 969 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 970 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 971 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 972 PetscLogObjectParent(mat,A); 973 974 /* copy over the A part */ 975 Aloc = (Mat_SeqAIJ*)aij->A->data; 976 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 977 row = aij->rstart; 978 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 979 for (i=0; i<m; i++) { 980 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 981 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 982 } 983 aj = Aloc->j; 984 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 985 986 /* copy over the B part */ 987 Aloc = (Mat_SeqAIJ*)aij->B->data; 988 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 989 row = aij->rstart; 990 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 991 ct = cols; 992 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 993 for (i=0; i<m; i++) { 994 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 995 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 996 } 997 ierr = PetscFree(ct);CHKERRQ(ierr); 998 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 999 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1000 /* 1001 Everyone has to call to draw the matrix since the graphics waits are 1002 synchronized across all processors that share the PetscDraw object 1003 */ 1004 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1005 if (!rank) { 1006 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1007 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1008 } 1009 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1010 ierr = MatDestroy(A);CHKERRQ(ierr); 1011 } 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatView_MPIAIJ" 1017 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1018 { 1019 PetscErrorCode ierr; 1020 PetscTruth iascii,isdraw,issocket,isbinary; 1021 1022 PetscFunctionBegin; 1023 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1024 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1025 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1026 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1027 if (iascii || isdraw || isbinary || issocket) { 1028 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1029 } else { 1030 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1031 } 1032 PetscFunctionReturn(0); 1033 } 1034 1035 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatRelax_MPIAIJ" 1039 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1040 { 1041 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1042 PetscErrorCode ierr; 1043 Vec bb1; 1044 PetscScalar mone=-1.0; 1045 1046 PetscFunctionBegin; 1047 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1048 1049 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1050 1051 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1052 if (flag & SOR_ZERO_INITIAL_GUESS) { 1053 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1054 its--; 1055 } 1056 1057 while (its--) { 1058 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1059 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1060 1061 /* update rhs: bb1 = bb - B*x */ 1062 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1063 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1064 1065 /* local sweep */ 1066 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1067 CHKERRQ(ierr); 1068 } 1069 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1070 if (flag & SOR_ZERO_INITIAL_GUESS) { 1071 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1072 its--; 1073 } 1074 while (its--) { 1075 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1076 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1077 1078 /* update rhs: bb1 = bb - B*x */ 1079 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1080 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1081 1082 /* local sweep */ 1083 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1084 CHKERRQ(ierr); 1085 } 1086 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1087 if (flag & SOR_ZERO_INITIAL_GUESS) { 1088 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1089 its--; 1090 } 1091 while (its--) { 1092 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1093 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1094 1095 /* update rhs: bb1 = bb - B*x */ 1096 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1097 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1098 1099 /* local sweep */ 1100 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1101 CHKERRQ(ierr); 1102 } 1103 } else { 1104 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1105 } 1106 1107 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1108 PetscFunctionReturn(0); 1109 } 1110 1111 #undef __FUNCT__ 1112 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1113 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1114 { 1115 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1116 Mat A = mat->A,B = mat->B; 1117 PetscErrorCode ierr; 1118 PetscReal isend[5],irecv[5]; 1119 1120 PetscFunctionBegin; 1121 info->block_size = 1.0; 1122 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1123 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1124 isend[3] = info->memory; isend[4] = info->mallocs; 1125 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1126 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1127 isend[3] += info->memory; isend[4] += info->mallocs; 1128 if (flag == MAT_LOCAL) { 1129 info->nz_used = isend[0]; 1130 info->nz_allocated = isend[1]; 1131 info->nz_unneeded = isend[2]; 1132 info->memory = isend[3]; 1133 info->mallocs = isend[4]; 1134 } else if (flag == MAT_GLOBAL_MAX) { 1135 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1136 info->nz_used = irecv[0]; 1137 info->nz_allocated = irecv[1]; 1138 info->nz_unneeded = irecv[2]; 1139 info->memory = irecv[3]; 1140 info->mallocs = irecv[4]; 1141 } else if (flag == MAT_GLOBAL_SUM) { 1142 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1143 info->nz_used = irecv[0]; 1144 info->nz_allocated = irecv[1]; 1145 info->nz_unneeded = irecv[2]; 1146 info->memory = irecv[3]; 1147 info->mallocs = irecv[4]; 1148 } 1149 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1150 info->fill_ratio_needed = 0; 1151 info->factor_mallocs = 0; 1152 info->rows_global = (double)matin->M; 1153 info->columns_global = (double)matin->N; 1154 info->rows_local = (double)matin->m; 1155 info->columns_local = (double)matin->N; 1156 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatSetOption_MPIAIJ" 1162 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1163 { 1164 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1165 PetscErrorCode ierr; 1166 1167 PetscFunctionBegin; 1168 switch (op) { 1169 case MAT_NO_NEW_NONZERO_LOCATIONS: 1170 case MAT_YES_NEW_NONZERO_LOCATIONS: 1171 case MAT_COLUMNS_UNSORTED: 1172 case MAT_COLUMNS_SORTED: 1173 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1174 case MAT_KEEP_ZEROED_ROWS: 1175 case MAT_NEW_NONZERO_LOCATION_ERR: 1176 case MAT_USE_INODES: 1177 case MAT_DO_NOT_USE_INODES: 1178 case MAT_IGNORE_ZERO_ENTRIES: 1179 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1180 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1181 break; 1182 case MAT_ROW_ORIENTED: 1183 a->roworiented = PETSC_TRUE; 1184 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1185 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1186 break; 1187 case MAT_ROWS_SORTED: 1188 case MAT_ROWS_UNSORTED: 1189 case MAT_YES_NEW_DIAGONALS: 1190 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1191 break; 1192 case MAT_COLUMN_ORIENTED: 1193 a->roworiented = PETSC_FALSE; 1194 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1195 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1196 break; 1197 case MAT_IGNORE_OFF_PROC_ENTRIES: 1198 a->donotstash = PETSC_TRUE; 1199 break; 1200 case MAT_NO_NEW_DIAGONALS: 1201 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1202 case MAT_SYMMETRIC: 1203 case MAT_STRUCTURALLY_SYMMETRIC: 1204 case MAT_HERMITIAN: 1205 case MAT_SYMMETRY_ETERNAL: 1206 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1207 break; 1208 case MAT_NOT_SYMMETRIC: 1209 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1210 case MAT_NOT_HERMITIAN: 1211 case MAT_NOT_SYMMETRY_ETERNAL: 1212 break; 1213 default: 1214 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1215 } 1216 PetscFunctionReturn(0); 1217 } 1218 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatGetRow_MPIAIJ" 1221 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1222 { 1223 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1224 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1225 PetscErrorCode ierr; 1226 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1227 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1228 PetscInt *cmap,*idx_p; 1229 1230 PetscFunctionBegin; 1231 if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1232 mat->getrowactive = PETSC_TRUE; 1233 1234 if (!mat->rowvalues && (idx || v)) { 1235 /* 1236 allocate enough space to hold information from the longest row. 1237 */ 1238 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1239 PetscInt max = 1,tmp; 1240 for (i=0; i<matin->m; i++) { 1241 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1242 if (max < tmp) { max = tmp; } 1243 } 1244 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1245 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1246 } 1247 1248 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1249 lrow = row - rstart; 1250 1251 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1252 if (!v) {pvA = 0; pvB = 0;} 1253 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1254 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1255 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1256 nztot = nzA + nzB; 1257 1258 cmap = mat->garray; 1259 if (v || idx) { 1260 if (nztot) { 1261 /* Sort by increasing column numbers, assuming A and B already sorted */ 1262 PetscInt imark = -1; 1263 if (v) { 1264 *v = v_p = mat->rowvalues; 1265 for (i=0; i<nzB; i++) { 1266 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1267 else break; 1268 } 1269 imark = i; 1270 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1271 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1272 } 1273 if (idx) { 1274 *idx = idx_p = mat->rowindices; 1275 if (imark > -1) { 1276 for (i=0; i<imark; i++) { 1277 idx_p[i] = cmap[cworkB[i]]; 1278 } 1279 } else { 1280 for (i=0; i<nzB; i++) { 1281 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1282 else break; 1283 } 1284 imark = i; 1285 } 1286 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1287 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1288 } 1289 } else { 1290 if (idx) *idx = 0; 1291 if (v) *v = 0; 1292 } 1293 } 1294 *nz = nztot; 1295 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1296 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNCT__ 1301 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1302 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1303 { 1304 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1305 1306 PetscFunctionBegin; 1307 if (!aij->getrowactive) { 1308 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1309 } 1310 aij->getrowactive = PETSC_FALSE; 1311 PetscFunctionReturn(0); 1312 } 1313 1314 #undef __FUNCT__ 1315 #define __FUNCT__ "MatNorm_MPIAIJ" 1316 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1317 { 1318 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1319 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1320 PetscErrorCode ierr; 1321 PetscInt i,j,cstart = aij->cstart; 1322 PetscReal sum = 0.0; 1323 PetscScalar *v; 1324 1325 PetscFunctionBegin; 1326 if (aij->size == 1) { 1327 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1328 } else { 1329 if (type == NORM_FROBENIUS) { 1330 v = amat->a; 1331 for (i=0; i<amat->nz; i++) { 1332 #if defined(PETSC_USE_COMPLEX) 1333 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1334 #else 1335 sum += (*v)*(*v); v++; 1336 #endif 1337 } 1338 v = bmat->a; 1339 for (i=0; i<bmat->nz; i++) { 1340 #if defined(PETSC_USE_COMPLEX) 1341 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1342 #else 1343 sum += (*v)*(*v); v++; 1344 #endif 1345 } 1346 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1347 *norm = sqrt(*norm); 1348 } else if (type == NORM_1) { /* max column norm */ 1349 PetscReal *tmp,*tmp2; 1350 PetscInt *jj,*garray = aij->garray; 1351 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1352 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1353 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1354 *norm = 0.0; 1355 v = amat->a; jj = amat->j; 1356 for (j=0; j<amat->nz; j++) { 1357 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1358 } 1359 v = bmat->a; jj = bmat->j; 1360 for (j=0; j<bmat->nz; j++) { 1361 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1362 } 1363 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1364 for (j=0; j<mat->N; j++) { 1365 if (tmp2[j] > *norm) *norm = tmp2[j]; 1366 } 1367 ierr = PetscFree(tmp);CHKERRQ(ierr); 1368 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1369 } else if (type == NORM_INFINITY) { /* max row norm */ 1370 PetscReal ntemp = 0.0; 1371 for (j=0; j<aij->A->m; j++) { 1372 v = amat->a + amat->i[j]; 1373 sum = 0.0; 1374 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1375 sum += PetscAbsScalar(*v); v++; 1376 } 1377 v = bmat->a + bmat->i[j]; 1378 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1379 sum += PetscAbsScalar(*v); v++; 1380 } 1381 if (sum > ntemp) ntemp = sum; 1382 } 1383 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1384 } else { 1385 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1386 } 1387 } 1388 PetscFunctionReturn(0); 1389 } 1390 1391 #undef __FUNCT__ 1392 #define __FUNCT__ "MatTranspose_MPIAIJ" 1393 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1394 { 1395 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1396 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1397 PetscErrorCode ierr; 1398 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1399 Mat B; 1400 PetscScalar *array; 1401 1402 PetscFunctionBegin; 1403 if (!matout && M != N) { 1404 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1405 } 1406 1407 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1408 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1409 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1410 1411 /* copy over the A part */ 1412 Aloc = (Mat_SeqAIJ*)a->A->data; 1413 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1414 row = a->rstart; 1415 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1416 for (i=0; i<m; i++) { 1417 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1418 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1419 } 1420 aj = Aloc->j; 1421 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1422 1423 /* copy over the B part */ 1424 Aloc = (Mat_SeqAIJ*)a->B->data; 1425 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1426 row = a->rstart; 1427 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1428 ct = cols; 1429 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1430 for (i=0; i<m; i++) { 1431 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1432 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1433 } 1434 ierr = PetscFree(ct);CHKERRQ(ierr); 1435 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1436 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1437 if (matout) { 1438 *matout = B; 1439 } else { 1440 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1441 } 1442 PetscFunctionReturn(0); 1443 } 1444 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1447 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1448 { 1449 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1450 Mat a = aij->A,b = aij->B; 1451 PetscErrorCode ierr; 1452 PetscInt s1,s2,s3; 1453 1454 PetscFunctionBegin; 1455 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1456 if (rr) { 1457 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1458 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1459 /* Overlap communication with computation. */ 1460 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1461 } 1462 if (ll) { 1463 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1464 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1465 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1466 } 1467 /* scale the diagonal block */ 1468 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1469 1470 if (rr) { 1471 /* Do a scatter end and then right scale the off-diagonal block */ 1472 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1473 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1474 } 1475 1476 PetscFunctionReturn(0); 1477 } 1478 1479 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1482 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1483 { 1484 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1485 PetscErrorCode ierr; 1486 1487 PetscFunctionBegin; 1488 if (!a->rank) { 1489 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1490 } 1491 PetscFunctionReturn(0); 1492 } 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1496 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1497 { 1498 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1499 PetscErrorCode ierr; 1500 1501 PetscFunctionBegin; 1502 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1503 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1504 PetscFunctionReturn(0); 1505 } 1506 #undef __FUNCT__ 1507 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1508 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1509 { 1510 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1511 PetscErrorCode ierr; 1512 1513 PetscFunctionBegin; 1514 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1515 PetscFunctionReturn(0); 1516 } 1517 1518 #undef __FUNCT__ 1519 #define __FUNCT__ "MatEqual_MPIAIJ" 1520 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1521 { 1522 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1523 Mat a,b,c,d; 1524 PetscTruth flg; 1525 PetscErrorCode ierr; 1526 1527 PetscFunctionBegin; 1528 a = matA->A; b = matA->B; 1529 c = matB->A; d = matB->B; 1530 1531 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1532 if (flg) { 1533 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1534 } 1535 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatCopy_MPIAIJ" 1541 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1542 { 1543 PetscErrorCode ierr; 1544 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1545 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1546 1547 PetscFunctionBegin; 1548 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1549 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1550 /* because of the column compression in the off-processor part of the matrix a->B, 1551 the number of columns in a->B and b->B may be different, hence we cannot call 1552 the MatCopy() directly on the two parts. If need be, we can provide a more 1553 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1554 then copying the submatrices */ 1555 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1556 } else { 1557 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1558 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1559 } 1560 PetscFunctionReturn(0); 1561 } 1562 1563 #undef __FUNCT__ 1564 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1565 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1566 { 1567 PetscErrorCode ierr; 1568 1569 PetscFunctionBegin; 1570 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1571 PetscFunctionReturn(0); 1572 } 1573 1574 #include "petscblaslapack.h" 1575 #undef __FUNCT__ 1576 #define __FUNCT__ "MatAXPY_MPIAIJ" 1577 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1578 { 1579 PetscErrorCode ierr; 1580 PetscInt i; 1581 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1582 PetscBLASInt bnz,one=1; 1583 Mat_SeqAIJ *x,*y; 1584 1585 PetscFunctionBegin; 1586 if (str == SAME_NONZERO_PATTERN) { 1587 x = (Mat_SeqAIJ *)xx->A->data; 1588 y = (Mat_SeqAIJ *)yy->A->data; 1589 bnz = (PetscBLASInt)x->nz; 1590 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1591 x = (Mat_SeqAIJ *)xx->B->data; 1592 y = (Mat_SeqAIJ *)yy->B->data; 1593 bnz = (PetscBLASInt)x->nz; 1594 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1595 } else if (str == SUBSET_NONZERO_PATTERN) { 1596 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1597 1598 x = (Mat_SeqAIJ *)xx->B->data; 1599 y = (Mat_SeqAIJ *)yy->B->data; 1600 if (y->xtoy && y->XtoY != xx->B) { 1601 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1602 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1603 } 1604 if (!y->xtoy) { /* get xtoy */ 1605 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1606 y->XtoY = xx->B; 1607 } 1608 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1609 } else { 1610 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1611 } 1612 PetscFunctionReturn(0); 1613 } 1614 1615 /* -------------------------------------------------------------------*/ 1616 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1617 MatGetRow_MPIAIJ, 1618 MatRestoreRow_MPIAIJ, 1619 MatMult_MPIAIJ, 1620 /* 4*/ MatMultAdd_MPIAIJ, 1621 MatMultTranspose_MPIAIJ, 1622 MatMultTransposeAdd_MPIAIJ, 1623 0, 1624 0, 1625 0, 1626 /*10*/ 0, 1627 0, 1628 0, 1629 MatRelax_MPIAIJ, 1630 MatTranspose_MPIAIJ, 1631 /*15*/ MatGetInfo_MPIAIJ, 1632 MatEqual_MPIAIJ, 1633 MatGetDiagonal_MPIAIJ, 1634 MatDiagonalScale_MPIAIJ, 1635 MatNorm_MPIAIJ, 1636 /*20*/ MatAssemblyBegin_MPIAIJ, 1637 MatAssemblyEnd_MPIAIJ, 1638 0, 1639 MatSetOption_MPIAIJ, 1640 MatZeroEntries_MPIAIJ, 1641 /*25*/ MatZeroRows_MPIAIJ, 1642 0, 1643 0, 1644 0, 1645 0, 1646 /*30*/ MatSetUpPreallocation_MPIAIJ, 1647 0, 1648 0, 1649 0, 1650 0, 1651 /*35*/ MatDuplicate_MPIAIJ, 1652 0, 1653 0, 1654 0, 1655 0, 1656 /*40*/ MatAXPY_MPIAIJ, 1657 MatGetSubMatrices_MPIAIJ, 1658 MatIncreaseOverlap_MPIAIJ, 1659 MatGetValues_MPIAIJ, 1660 MatCopy_MPIAIJ, 1661 /*45*/ MatPrintHelp_MPIAIJ, 1662 MatScale_MPIAIJ, 1663 0, 1664 0, 1665 0, 1666 /*50*/ MatSetBlockSize_MPIAIJ, 1667 0, 1668 0, 1669 0, 1670 0, 1671 /*55*/ MatFDColoringCreate_MPIAIJ, 1672 0, 1673 MatSetUnfactored_MPIAIJ, 1674 0, 1675 0, 1676 /*60*/ MatGetSubMatrix_MPIAIJ, 1677 MatDestroy_MPIAIJ, 1678 MatView_MPIAIJ, 1679 MatGetPetscMaps_Petsc, 1680 0, 1681 /*65*/ 0, 1682 0, 1683 0, 1684 0, 1685 0, 1686 /*70*/ 0, 1687 0, 1688 MatSetColoring_MPIAIJ, 1689 #if defined(PETSC_HAVE_ADIC) 1690 MatSetValuesAdic_MPIAIJ, 1691 #else 1692 0, 1693 #endif 1694 MatSetValuesAdifor_MPIAIJ, 1695 /*75*/ 0, 1696 0, 1697 0, 1698 0, 1699 0, 1700 /*80*/ 0, 1701 0, 1702 0, 1703 0, 1704 /*84*/ MatLoad_MPIAIJ, 1705 0, 1706 0, 1707 0, 1708 0, 1709 0, 1710 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1711 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1712 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1713 MatPtAP_MPIAIJ_MPIAIJ, 1714 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1715 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 1716 0, 1717 0, 1718 0}; 1719 1720 /* ----------------------------------------------------------------------------------------*/ 1721 1722 EXTERN_C_BEGIN 1723 #undef __FUNCT__ 1724 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1725 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1726 { 1727 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1728 PetscErrorCode ierr; 1729 1730 PetscFunctionBegin; 1731 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1732 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1733 PetscFunctionReturn(0); 1734 } 1735 EXTERN_C_END 1736 1737 EXTERN_C_BEGIN 1738 #undef __FUNCT__ 1739 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1740 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1741 { 1742 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1743 PetscErrorCode ierr; 1744 1745 PetscFunctionBegin; 1746 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1747 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1748 PetscFunctionReturn(0); 1749 } 1750 EXTERN_C_END 1751 1752 #include "petscpc.h" 1753 EXTERN_C_BEGIN 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1756 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1757 { 1758 Mat_MPIAIJ *b; 1759 PetscErrorCode ierr; 1760 PetscInt i; 1761 1762 PetscFunctionBegin; 1763 B->preallocated = PETSC_TRUE; 1764 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1765 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1766 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1767 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1768 if (d_nnz) { 1769 for (i=0; i<B->m; i++) { 1770 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1771 } 1772 } 1773 if (o_nnz) { 1774 for (i=0; i<B->m; i++) { 1775 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1776 } 1777 } 1778 b = (Mat_MPIAIJ*)B->data; 1779 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1780 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1781 1782 PetscFunctionReturn(0); 1783 } 1784 EXTERN_C_END 1785 1786 /*MC 1787 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1788 1789 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1790 and MATMPIAIJ otherwise. As a result, for single process communicators, 1791 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1792 for communicators controlling multiple processes. It is recommended that you call both of 1793 the above preallocation routines for simplicity. 1794 1795 Options Database Keys: 1796 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1797 1798 Level: beginner 1799 1800 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1801 M*/ 1802 1803 EXTERN_C_BEGIN 1804 #undef __FUNCT__ 1805 #define __FUNCT__ "MatCreate_AIJ" 1806 PetscErrorCode MatCreate_AIJ(Mat A) 1807 { 1808 PetscErrorCode ierr; 1809 PetscMPIInt size; 1810 1811 PetscFunctionBegin; 1812 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1813 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1814 if (size == 1) { 1815 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1816 } else { 1817 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1818 } 1819 PetscFunctionReturn(0); 1820 } 1821 EXTERN_C_END 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1825 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1826 { 1827 Mat mat; 1828 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1829 PetscErrorCode ierr; 1830 1831 PetscFunctionBegin; 1832 *newmat = 0; 1833 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1834 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1835 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1836 a = (Mat_MPIAIJ*)mat->data; 1837 1838 mat->factor = matin->factor; 1839 mat->bs = matin->bs; 1840 mat->assembled = PETSC_TRUE; 1841 mat->insertmode = NOT_SET_VALUES; 1842 mat->preallocated = PETSC_TRUE; 1843 1844 a->rstart = oldmat->rstart; 1845 a->rend = oldmat->rend; 1846 a->cstart = oldmat->cstart; 1847 a->cend = oldmat->cend; 1848 a->size = oldmat->size; 1849 a->rank = oldmat->rank; 1850 a->donotstash = oldmat->donotstash; 1851 a->roworiented = oldmat->roworiented; 1852 a->rowindices = 0; 1853 a->rowvalues = 0; 1854 a->getrowactive = PETSC_FALSE; 1855 1856 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1857 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1858 if (oldmat->colmap) { 1859 #if defined (PETSC_USE_CTABLE) 1860 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1861 #else 1862 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1863 PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt)); 1864 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1865 #endif 1866 } else a->colmap = 0; 1867 if (oldmat->garray) { 1868 PetscInt len; 1869 len = oldmat->B->n; 1870 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1871 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 1872 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1873 } else a->garray = 0; 1874 1875 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1876 PetscLogObjectParent(mat,a->lvec); 1877 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1878 PetscLogObjectParent(mat,a->Mvctx); 1879 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1880 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1881 PetscLogObjectParent(mat,a->A); 1882 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1883 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1884 PetscLogObjectParent(mat,a->B); 1885 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1886 *newmat = mat; 1887 PetscFunctionReturn(0); 1888 } 1889 1890 #include "petscsys.h" 1891 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "MatLoad_MPIAIJ" 1894 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1895 { 1896 Mat A; 1897 PetscScalar *vals,*svals; 1898 MPI_Comm comm = ((PetscObject)viewer)->comm; 1899 MPI_Status status; 1900 PetscErrorCode ierr; 1901 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1902 PetscInt i,nz,j,rstart,rend; 1903 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1904 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1905 PetscInt cend,cstart,n,*rowners; 1906 int fd; 1907 1908 PetscFunctionBegin; 1909 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1910 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1911 if (!rank) { 1912 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1913 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1914 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1915 } 1916 1917 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1918 M = header[1]; N = header[2]; 1919 /* determine ownership of all rows */ 1920 m = M/size + ((M % size) > rank); 1921 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1922 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1923 rowners[0] = 0; 1924 for (i=2; i<=size; i++) { 1925 rowners[i] += rowners[i-1]; 1926 } 1927 rstart = rowners[rank]; 1928 rend = rowners[rank+1]; 1929 1930 /* distribute row lengths to all processors */ 1931 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 1932 if (!rank) { 1933 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1934 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1935 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1936 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1937 for (j=0; j<m; j++) { 1938 procsnz[0] += ourlens[j]; 1939 } 1940 for (i=1; i<size; i++) { 1941 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1942 /* calculate the number of nonzeros on each processor */ 1943 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1944 procsnz[i] += rowlengths[j]; 1945 } 1946 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1947 } 1948 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1949 } else { 1950 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1951 } 1952 1953 if (!rank) { 1954 /* determine max buffer needed and allocate it */ 1955 maxnz = 0; 1956 for (i=0; i<size; i++) { 1957 maxnz = PetscMax(maxnz,procsnz[i]); 1958 } 1959 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1960 1961 /* read in my part of the matrix column indices */ 1962 nz = procsnz[0]; 1963 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1964 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1965 1966 /* read in every one elses and ship off */ 1967 for (i=1; i<size; i++) { 1968 nz = procsnz[i]; 1969 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1970 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1971 } 1972 ierr = PetscFree(cols);CHKERRQ(ierr); 1973 } else { 1974 /* determine buffer space needed for message */ 1975 nz = 0; 1976 for (i=0; i<m; i++) { 1977 nz += ourlens[i]; 1978 } 1979 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1980 1981 /* receive message of column indices*/ 1982 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1983 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1984 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1985 } 1986 1987 /* determine column ownership if matrix is not square */ 1988 if (N != M) { 1989 n = N/size + ((N % size) > rank); 1990 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1991 cstart = cend - n; 1992 } else { 1993 cstart = rstart; 1994 cend = rend; 1995 n = cend - cstart; 1996 } 1997 1998 /* loop over local rows, determining number of off diagonal entries */ 1999 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2000 jj = 0; 2001 for (i=0; i<m; i++) { 2002 for (j=0; j<ourlens[i]; j++) { 2003 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2004 jj++; 2005 } 2006 } 2007 2008 /* create our matrix */ 2009 for (i=0; i<m; i++) { 2010 ourlens[i] -= offlens[i]; 2011 } 2012 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2013 ierr = MatSetType(A,type);CHKERRQ(ierr); 2014 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2015 2016 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2017 for (i=0; i<m; i++) { 2018 ourlens[i] += offlens[i]; 2019 } 2020 2021 if (!rank) { 2022 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2023 2024 /* read in my part of the matrix numerical values */ 2025 nz = procsnz[0]; 2026 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2027 2028 /* insert into matrix */ 2029 jj = rstart; 2030 smycols = mycols; 2031 svals = vals; 2032 for (i=0; i<m; i++) { 2033 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2034 smycols += ourlens[i]; 2035 svals += ourlens[i]; 2036 jj++; 2037 } 2038 2039 /* read in other processors and ship out */ 2040 for (i=1; i<size; i++) { 2041 nz = procsnz[i]; 2042 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2043 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2044 } 2045 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2046 } else { 2047 /* receive numeric values */ 2048 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2049 2050 /* receive message of values*/ 2051 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2052 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2053 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2054 2055 /* insert into matrix */ 2056 jj = rstart; 2057 smycols = mycols; 2058 svals = vals; 2059 for (i=0; i<m; i++) { 2060 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2061 smycols += ourlens[i]; 2062 svals += ourlens[i]; 2063 jj++; 2064 } 2065 } 2066 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2067 ierr = PetscFree(vals);CHKERRQ(ierr); 2068 ierr = PetscFree(mycols);CHKERRQ(ierr); 2069 ierr = PetscFree(rowners);CHKERRQ(ierr); 2070 2071 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2072 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2073 *newmat = A; 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2079 /* 2080 Not great since it makes two copies of the submatrix, first an SeqAIJ 2081 in local and then by concatenating the local matrices the end result. 2082 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2083 */ 2084 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2085 { 2086 PetscErrorCode ierr; 2087 PetscMPIInt rank,size; 2088 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2089 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2090 Mat *local,M,Mreuse; 2091 PetscScalar *vwork,*aa; 2092 MPI_Comm comm = mat->comm; 2093 Mat_SeqAIJ *aij; 2094 2095 2096 PetscFunctionBegin; 2097 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2098 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2099 2100 if (call == MAT_REUSE_MATRIX) { 2101 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2102 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2103 local = &Mreuse; 2104 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2105 } else { 2106 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2107 Mreuse = *local; 2108 ierr = PetscFree(local);CHKERRQ(ierr); 2109 } 2110 2111 /* 2112 m - number of local rows 2113 n - number of columns (same on all processors) 2114 rstart - first row in new global matrix generated 2115 */ 2116 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2117 if (call == MAT_INITIAL_MATRIX) { 2118 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2119 ii = aij->i; 2120 jj = aij->j; 2121 2122 /* 2123 Determine the number of non-zeros in the diagonal and off-diagonal 2124 portions of the matrix in order to do correct preallocation 2125 */ 2126 2127 /* first get start and end of "diagonal" columns */ 2128 if (csize == PETSC_DECIDE) { 2129 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2130 if (mglobal == n) { /* square matrix */ 2131 nlocal = m; 2132 } else { 2133 nlocal = n/size + ((n % size) > rank); 2134 } 2135 } else { 2136 nlocal = csize; 2137 } 2138 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2139 rstart = rend - nlocal; 2140 if (rank == size - 1 && rend != n) { 2141 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2142 } 2143 2144 /* next, compute all the lengths */ 2145 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2146 olens = dlens + m; 2147 for (i=0; i<m; i++) { 2148 jend = ii[i+1] - ii[i]; 2149 olen = 0; 2150 dlen = 0; 2151 for (j=0; j<jend; j++) { 2152 if (*jj < rstart || *jj >= rend) olen++; 2153 else dlen++; 2154 jj++; 2155 } 2156 olens[i] = olen; 2157 dlens[i] = dlen; 2158 } 2159 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2160 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2161 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2162 ierr = PetscFree(dlens);CHKERRQ(ierr); 2163 } else { 2164 PetscInt ml,nl; 2165 2166 M = *newmat; 2167 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2168 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2169 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2170 /* 2171 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2172 rather than the slower MatSetValues(). 2173 */ 2174 M->was_assembled = PETSC_TRUE; 2175 M->assembled = PETSC_FALSE; 2176 } 2177 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2178 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2179 ii = aij->i; 2180 jj = aij->j; 2181 aa = aij->a; 2182 for (i=0; i<m; i++) { 2183 row = rstart + i; 2184 nz = ii[i+1] - ii[i]; 2185 cwork = jj; jj += nz; 2186 vwork = aa; aa += nz; 2187 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2188 } 2189 2190 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2191 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2192 *newmat = M; 2193 2194 /* save submatrix used in processor for next request */ 2195 if (call == MAT_INITIAL_MATRIX) { 2196 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2197 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2198 } 2199 2200 PetscFunctionReturn(0); 2201 } 2202 2203 EXTERN_C_BEGIN 2204 #undef __FUNCT__ 2205 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2206 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2207 { 2208 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2209 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2210 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2211 const PetscInt *JJ; 2212 PetscScalar *values; 2213 PetscErrorCode ierr; 2214 2215 PetscFunctionBegin; 2216 #if defined(PETSC_OPT_g) 2217 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2218 #endif 2219 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2220 o_nnz = d_nnz + m; 2221 2222 for (i=0; i<m; i++) { 2223 nnz = I[i+1]- I[i]; 2224 JJ = J + I[i]; 2225 nnz_max = PetscMax(nnz_max,nnz); 2226 #if defined(PETSC_OPT_g) 2227 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2228 #endif 2229 for (j=0; j<nnz; j++) { 2230 if (*JJ >= cstart) break; 2231 JJ++; 2232 } 2233 d = 0; 2234 for (; j<nnz; j++) { 2235 if (*JJ++ >= cend) break; 2236 d++; 2237 } 2238 d_nnz[i] = d; 2239 o_nnz[i] = nnz - d; 2240 } 2241 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2242 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2243 2244 if (v) values = (PetscScalar*)v; 2245 else { 2246 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2247 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2248 } 2249 2250 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2251 for (i=0; i<m; i++) { 2252 ii = i + rstart; 2253 nnz = I[i+1]- I[i]; 2254 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2255 } 2256 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2257 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2258 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2259 2260 if (!v) { 2261 ierr = PetscFree(values);CHKERRQ(ierr); 2262 } 2263 PetscFunctionReturn(0); 2264 } 2265 EXTERN_C_END 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2269 /*@C 2270 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2271 (the default parallel PETSc format). 2272 2273 Collective on MPI_Comm 2274 2275 Input Parameters: 2276 + A - the matrix 2277 . i - the indices into j for the start of each local row (starts with zero) 2278 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2279 - v - optional values in the matrix 2280 2281 Level: developer 2282 2283 .keywords: matrix, aij, compressed row, sparse, parallel 2284 2285 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2286 @*/ 2287 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2288 { 2289 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2290 2291 PetscFunctionBegin; 2292 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2293 if (f) { 2294 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2295 } 2296 PetscFunctionReturn(0); 2297 } 2298 2299 #undef __FUNCT__ 2300 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2301 /*@C 2302 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2303 (the default parallel PETSc format). For good matrix assembly performance 2304 the user should preallocate the matrix storage by setting the parameters 2305 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2306 performance can be increased by more than a factor of 50. 2307 2308 Collective on MPI_Comm 2309 2310 Input Parameters: 2311 + A - the matrix 2312 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2313 (same value is used for all local rows) 2314 . d_nnz - array containing the number of nonzeros in the various rows of the 2315 DIAGONAL portion of the local submatrix (possibly different for each row) 2316 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2317 The size of this array is equal to the number of local rows, i.e 'm'. 2318 You must leave room for the diagonal entry even if it is zero. 2319 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2320 submatrix (same value is used for all local rows). 2321 - o_nnz - array containing the number of nonzeros in the various rows of the 2322 OFF-DIAGONAL portion of the local submatrix (possibly different for 2323 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2324 structure. The size of this array is equal to the number 2325 of local rows, i.e 'm'. 2326 2327 If the *_nnz parameter is given then the *_nz parameter is ignored 2328 2329 The AIJ format (also called the Yale sparse matrix format or 2330 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2331 storage. The stored row and column indices begin with zero. See the users manual for details. 2332 2333 The parallel matrix is partitioned such that the first m0 rows belong to 2334 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2335 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2336 2337 The DIAGONAL portion of the local submatrix of a processor can be defined 2338 as the submatrix which is obtained by extraction the part corresponding 2339 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2340 first row that belongs to the processor, and r2 is the last row belonging 2341 to the this processor. This is a square mxm matrix. The remaining portion 2342 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2343 2344 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2345 2346 Example usage: 2347 2348 Consider the following 8x8 matrix with 34 non-zero values, that is 2349 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2350 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2351 as follows: 2352 2353 .vb 2354 1 2 0 | 0 3 0 | 0 4 2355 Proc0 0 5 6 | 7 0 0 | 8 0 2356 9 0 10 | 11 0 0 | 12 0 2357 ------------------------------------- 2358 13 0 14 | 15 16 17 | 0 0 2359 Proc1 0 18 0 | 19 20 21 | 0 0 2360 0 0 0 | 22 23 0 | 24 0 2361 ------------------------------------- 2362 Proc2 25 26 27 | 0 0 28 | 29 0 2363 30 0 0 | 31 32 33 | 0 34 2364 .ve 2365 2366 This can be represented as a collection of submatrices as: 2367 2368 .vb 2369 A B C 2370 D E F 2371 G H I 2372 .ve 2373 2374 Where the submatrices A,B,C are owned by proc0, D,E,F are 2375 owned by proc1, G,H,I are owned by proc2. 2376 2377 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2378 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2379 The 'M','N' parameters are 8,8, and have the same values on all procs. 2380 2381 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2382 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2383 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2384 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2385 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2386 matrix, ans [DF] as another SeqAIJ matrix. 2387 2388 When d_nz, o_nz parameters are specified, d_nz storage elements are 2389 allocated for every row of the local diagonal submatrix, and o_nz 2390 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2391 One way to choose d_nz and o_nz is to use the max nonzerors per local 2392 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2393 In this case, the values of d_nz,o_nz are: 2394 .vb 2395 proc0 : dnz = 2, o_nz = 2 2396 proc1 : dnz = 3, o_nz = 2 2397 proc2 : dnz = 1, o_nz = 4 2398 .ve 2399 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2400 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2401 for proc3. i.e we are using 12+15+10=37 storage locations to store 2402 34 values. 2403 2404 When d_nnz, o_nnz parameters are specified, the storage is specified 2405 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2406 In the above case the values for d_nnz,o_nnz are: 2407 .vb 2408 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2409 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2410 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2411 .ve 2412 Here the space allocated is sum of all the above values i.e 34, and 2413 hence pre-allocation is perfect. 2414 2415 Level: intermediate 2416 2417 .keywords: matrix, aij, compressed row, sparse, parallel 2418 2419 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2420 MPIAIJ 2421 @*/ 2422 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2423 { 2424 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2425 2426 PetscFunctionBegin; 2427 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2428 if (f) { 2429 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2430 } 2431 PetscFunctionReturn(0); 2432 } 2433 2434 #undef __FUNCT__ 2435 #define __FUNCT__ "MatCreateMPIAIJ" 2436 /*@C 2437 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2438 (the default parallel PETSc format). For good matrix assembly performance 2439 the user should preallocate the matrix storage by setting the parameters 2440 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2441 performance can be increased by more than a factor of 50. 2442 2443 Collective on MPI_Comm 2444 2445 Input Parameters: 2446 + comm - MPI communicator 2447 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2448 This value should be the same as the local size used in creating the 2449 y vector for the matrix-vector product y = Ax. 2450 . n - This value should be the same as the local size used in creating the 2451 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2452 calculated if N is given) For square matrices n is almost always m. 2453 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2454 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2455 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2456 (same value is used for all local rows) 2457 . d_nnz - array containing the number of nonzeros in the various rows of the 2458 DIAGONAL portion of the local submatrix (possibly different for each row) 2459 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2460 The size of this array is equal to the number of local rows, i.e 'm'. 2461 You must leave room for the diagonal entry even if it is zero. 2462 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2463 submatrix (same value is used for all local rows). 2464 - o_nnz - array containing the number of nonzeros in the various rows of the 2465 OFF-DIAGONAL portion of the local submatrix (possibly different for 2466 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2467 structure. The size of this array is equal to the number 2468 of local rows, i.e 'm'. 2469 2470 Output Parameter: 2471 . A - the matrix 2472 2473 Notes: 2474 If the *_nnz parameter is given then the *_nz parameter is ignored 2475 2476 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2477 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2478 storage requirements for this matrix. 2479 2480 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2481 processor than it must be used on all processors that share the object for 2482 that argument. 2483 2484 The user MUST specify either the local or global matrix dimensions 2485 (possibly both). 2486 2487 The parallel matrix is partitioned such that the first m0 rows belong to 2488 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2489 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2490 2491 The DIAGONAL portion of the local submatrix of a processor can be defined 2492 as the submatrix which is obtained by extraction the part corresponding 2493 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2494 first row that belongs to the processor, and r2 is the last row belonging 2495 to the this processor. This is a square mxm matrix. The remaining portion 2496 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2497 2498 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2499 2500 When calling this routine with a single process communicator, a matrix of 2501 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2502 type of communicator, use the construction mechanism: 2503 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2504 2505 By default, this format uses inodes (identical nodes) when possible. 2506 We search for consecutive rows with the same nonzero structure, thereby 2507 reusing matrix information to achieve increased efficiency. 2508 2509 Options Database Keys: 2510 + -mat_aij_no_inode - Do not use inodes 2511 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2512 - -mat_aij_oneindex - Internally use indexing starting at 1 2513 rather than 0. Note that when calling MatSetValues(), 2514 the user still MUST index entries starting at 0! 2515 2516 2517 Example usage: 2518 2519 Consider the following 8x8 matrix with 34 non-zero values, that is 2520 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2521 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2522 as follows: 2523 2524 .vb 2525 1 2 0 | 0 3 0 | 0 4 2526 Proc0 0 5 6 | 7 0 0 | 8 0 2527 9 0 10 | 11 0 0 | 12 0 2528 ------------------------------------- 2529 13 0 14 | 15 16 17 | 0 0 2530 Proc1 0 18 0 | 19 20 21 | 0 0 2531 0 0 0 | 22 23 0 | 24 0 2532 ------------------------------------- 2533 Proc2 25 26 27 | 0 0 28 | 29 0 2534 30 0 0 | 31 32 33 | 0 34 2535 .ve 2536 2537 This can be represented as a collection of submatrices as: 2538 2539 .vb 2540 A B C 2541 D E F 2542 G H I 2543 .ve 2544 2545 Where the submatrices A,B,C are owned by proc0, D,E,F are 2546 owned by proc1, G,H,I are owned by proc2. 2547 2548 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2549 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2550 The 'M','N' parameters are 8,8, and have the same values on all procs. 2551 2552 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2553 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2554 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2555 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2556 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2557 matrix, ans [DF] as another SeqAIJ matrix. 2558 2559 When d_nz, o_nz parameters are specified, d_nz storage elements are 2560 allocated for every row of the local diagonal submatrix, and o_nz 2561 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2562 One way to choose d_nz and o_nz is to use the max nonzerors per local 2563 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2564 In this case, the values of d_nz,o_nz are: 2565 .vb 2566 proc0 : dnz = 2, o_nz = 2 2567 proc1 : dnz = 3, o_nz = 2 2568 proc2 : dnz = 1, o_nz = 4 2569 .ve 2570 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2571 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2572 for proc3. i.e we are using 12+15+10=37 storage locations to store 2573 34 values. 2574 2575 When d_nnz, o_nnz parameters are specified, the storage is specified 2576 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2577 In the above case the values for d_nnz,o_nnz are: 2578 .vb 2579 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2580 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2581 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2582 .ve 2583 Here the space allocated is sum of all the above values i.e 34, and 2584 hence pre-allocation is perfect. 2585 2586 Level: intermediate 2587 2588 .keywords: matrix, aij, compressed row, sparse, parallel 2589 2590 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2591 MPIAIJ 2592 @*/ 2593 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2594 { 2595 PetscErrorCode ierr; 2596 PetscMPIInt size; 2597 2598 PetscFunctionBegin; 2599 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2600 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2601 if (size > 1) { 2602 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2603 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2604 } else { 2605 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2606 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2607 } 2608 PetscFunctionReturn(0); 2609 } 2610 2611 #undef __FUNCT__ 2612 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2613 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2614 { 2615 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2616 2617 PetscFunctionBegin; 2618 *Ad = a->A; 2619 *Ao = a->B; 2620 *colmap = a->garray; 2621 PetscFunctionReturn(0); 2622 } 2623 2624 #undef __FUNCT__ 2625 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2626 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2627 { 2628 PetscErrorCode ierr; 2629 PetscInt i; 2630 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2631 2632 PetscFunctionBegin; 2633 if (coloring->ctype == IS_COLORING_LOCAL) { 2634 ISColoringValue *allcolors,*colors; 2635 ISColoring ocoloring; 2636 2637 /* set coloring for diagonal portion */ 2638 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2639 2640 /* set coloring for off-diagonal portion */ 2641 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2642 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2643 for (i=0; i<a->B->n; i++) { 2644 colors[i] = allcolors[a->garray[i]]; 2645 } 2646 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2647 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2648 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2649 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2650 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2651 ISColoringValue *colors; 2652 PetscInt *larray; 2653 ISColoring ocoloring; 2654 2655 /* set coloring for diagonal portion */ 2656 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2657 for (i=0; i<a->A->n; i++) { 2658 larray[i] = i + a->cstart; 2659 } 2660 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2661 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2662 for (i=0; i<a->A->n; i++) { 2663 colors[i] = coloring->colors[larray[i]]; 2664 } 2665 ierr = PetscFree(larray);CHKERRQ(ierr); 2666 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2667 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2668 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2669 2670 /* set coloring for off-diagonal portion */ 2671 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2672 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2673 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2674 for (i=0; i<a->B->n; i++) { 2675 colors[i] = coloring->colors[larray[i]]; 2676 } 2677 ierr = PetscFree(larray);CHKERRQ(ierr); 2678 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2679 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2680 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2681 } else { 2682 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2683 } 2684 2685 PetscFunctionReturn(0); 2686 } 2687 2688 #if defined(PETSC_HAVE_ADIC) 2689 #undef __FUNCT__ 2690 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2691 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2692 { 2693 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2694 PetscErrorCode ierr; 2695 2696 PetscFunctionBegin; 2697 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2698 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2699 PetscFunctionReturn(0); 2700 } 2701 #endif 2702 2703 #undef __FUNCT__ 2704 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2705 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2706 { 2707 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2708 PetscErrorCode ierr; 2709 2710 PetscFunctionBegin; 2711 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2712 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2713 PetscFunctionReturn(0); 2714 } 2715 2716 #undef __FUNCT__ 2717 #define __FUNCT__ "MatMerge" 2718 /*@C 2719 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2720 matrices from each processor 2721 2722 Collective on MPI_Comm 2723 2724 Input Parameters: 2725 + comm - the communicators the parallel matrix will live on 2726 . inmat - the input sequential matrices 2727 . n - number of local columns (or PETSC_DECIDE) 2728 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2729 2730 Output Parameter: 2731 . outmat - the parallel matrix generated 2732 2733 Level: advanced 2734 2735 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2736 2737 @*/ 2738 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2739 { 2740 PetscErrorCode ierr; 2741 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2742 PetscInt *indx; 2743 PetscScalar *values; 2744 PetscMap columnmap,rowmap; 2745 2746 PetscFunctionBegin; 2747 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2748 /* 2749 PetscMPIInt rank; 2750 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2751 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2752 */ 2753 if (scall == MAT_INITIAL_MATRIX){ 2754 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2755 if (n == PETSC_DECIDE){ 2756 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2757 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2758 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2759 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2760 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2761 } 2762 2763 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2764 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2765 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2766 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2767 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2768 2769 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2770 for (i=0;i<m;i++) { 2771 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2772 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2773 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2774 } 2775 /* This routine will ONLY return MPIAIJ type matrix */ 2776 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2777 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2778 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2779 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2780 2781 } else if (scall == MAT_REUSE_MATRIX){ 2782 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2783 } else { 2784 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2785 } 2786 2787 for (i=0;i<m;i++) { 2788 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2789 I = i + rstart; 2790 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2791 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2792 } 2793 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2794 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2795 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2796 2797 PetscFunctionReturn(0); 2798 } 2799 2800 #undef __FUNCT__ 2801 #define __FUNCT__ "MatFileSplit" 2802 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2803 { 2804 PetscErrorCode ierr; 2805 PetscMPIInt rank; 2806 PetscInt m,N,i,rstart,nnz; 2807 size_t len; 2808 const PetscInt *indx; 2809 PetscViewer out; 2810 char *name; 2811 Mat B; 2812 const PetscScalar *values; 2813 2814 PetscFunctionBegin; 2815 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2816 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2817 /* Should this be the type of the diagonal block of A? */ 2818 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2819 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2820 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2821 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2822 for (i=0;i<m;i++) { 2823 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2824 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2825 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2826 } 2827 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2828 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2829 2830 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2831 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2832 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2833 sprintf(name,"%s.%d",outfile,rank); 2834 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2835 ierr = PetscFree(name); 2836 ierr = MatView(B,out);CHKERRQ(ierr); 2837 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2838 ierr = MatDestroy(B);CHKERRQ(ierr); 2839 PetscFunctionReturn(0); 2840 } 2841 2842 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2843 #undef __FUNCT__ 2844 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2845 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2846 { 2847 PetscErrorCode ierr; 2848 Mat_Merge_SeqsToMPI *merge; 2849 PetscObjectContainer container; 2850 2851 PetscFunctionBegin; 2852 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2853 if (container) { 2854 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2855 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2856 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2857 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2858 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2859 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2860 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2861 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2862 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2863 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2864 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2865 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2866 2867 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2868 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2869 } 2870 ierr = PetscFree(merge);CHKERRQ(ierr); 2871 2872 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2873 PetscFunctionReturn(0); 2874 } 2875 2876 #include "src/mat/utils/freespace.h" 2877 #include "petscbt.h" 2878 #undef __FUNCT__ 2879 #define __FUNCT__ "MatMerge_SeqsToMPI" 2880 /*@C 2881 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2882 matrices from each processor 2883 2884 Collective on MPI_Comm 2885 2886 Input Parameters: 2887 + comm - the communicators the parallel matrix will live on 2888 . seqmat - the input sequential matrices 2889 . m - number of local rows (or PETSC_DECIDE) 2890 . n - number of local columns (or PETSC_DECIDE) 2891 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2892 2893 Output Parameter: 2894 . mpimat - the parallel matrix generated 2895 2896 Level: advanced 2897 2898 Notes: 2899 The dimensions of the sequential matrix in each processor MUST be the same. 2900 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2901 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2902 @*/ 2903 static PetscEvent logkey_seqstompinum = 0; 2904 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2905 { 2906 PetscErrorCode ierr; 2907 MPI_Comm comm=mpimat->comm; 2908 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2909 PetscMPIInt size,rank,taga,*len_s; 2910 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2911 PetscInt proc,m; 2912 PetscInt **buf_ri,**buf_rj; 2913 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2914 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2915 MPI_Request *s_waits,*r_waits; 2916 MPI_Status *status; 2917 MatScalar *aa=a->a,**abuf_r,*ba_i; 2918 Mat_Merge_SeqsToMPI *merge; 2919 PetscObjectContainer container; 2920 2921 PetscFunctionBegin; 2922 if (!logkey_seqstompinum) { 2923 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2924 } 2925 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2926 2927 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2928 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2929 2930 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2931 if (container) { 2932 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2933 } 2934 bi = merge->bi; 2935 bj = merge->bj; 2936 buf_ri = merge->buf_ri; 2937 buf_rj = merge->buf_rj; 2938 2939 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2940 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2941 len_s = merge->len_s; 2942 2943 /* send and recv matrix values */ 2944 /*-----------------------------*/ 2945 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2946 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2947 2948 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2949 for (proc=0,k=0; proc<size; proc++){ 2950 if (!len_s[proc]) continue; 2951 i = owners[proc]; 2952 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2953 k++; 2954 } 2955 2956 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2957 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2958 ierr = PetscFree(status);CHKERRQ(ierr); 2959 2960 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2961 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2962 2963 /* insert mat values of mpimat */ 2964 /*----------------------------*/ 2965 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2966 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2967 nextrow = buf_ri_k + merge->nrecv; 2968 nextai = nextrow + merge->nrecv; 2969 2970 for (k=0; k<merge->nrecv; k++){ 2971 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2972 nrows = *(buf_ri_k[k]); 2973 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2974 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2975 } 2976 2977 /* set values of ba */ 2978 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2979 for (i=0; i<m; i++) { 2980 arow = owners[rank] + i; 2981 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2982 bnzi = bi[i+1] - bi[i]; 2983 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2984 2985 /* add local non-zero vals of this proc's seqmat into ba */ 2986 anzi = ai[arow+1] - ai[arow]; 2987 aj = a->j + ai[arow]; 2988 aa = a->a + ai[arow]; 2989 nextaj = 0; 2990 for (j=0; nextaj<anzi; j++){ 2991 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2992 ba_i[j] += aa[nextaj++]; 2993 } 2994 } 2995 2996 /* add received vals into ba */ 2997 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2998 /* i-th row */ 2999 if (i == *nextrow[k]) { 3000 anzi = *(nextai[k]+1) - *nextai[k]; 3001 aj = buf_rj[k] + *(nextai[k]); 3002 aa = abuf_r[k] + *(nextai[k]); 3003 nextaj = 0; 3004 for (j=0; nextaj<anzi; j++){ 3005 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3006 ba_i[j] += aa[nextaj++]; 3007 } 3008 } 3009 nextrow[k]++; nextai[k]++; 3010 } 3011 } 3012 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3013 } 3014 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3015 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3016 3017 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3018 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3019 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3020 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3021 PetscFunctionReturn(0); 3022 } 3023 static PetscEvent logkey_seqstompisym = 0; 3024 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3025 { 3026 PetscErrorCode ierr; 3027 Mat B_mpi; 3028 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3029 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3030 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3031 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3032 PetscInt len,proc,*dnz,*onz; 3033 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3034 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3035 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3036 MPI_Status *status; 3037 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3038 PetscBT lnkbt; 3039 Mat_Merge_SeqsToMPI *merge; 3040 PetscObjectContainer container; 3041 3042 PetscFunctionBegin; 3043 if (!logkey_seqstompisym) { 3044 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3045 } 3046 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3047 3048 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3049 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3050 3051 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3052 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3053 3054 /* determine row ownership */ 3055 /*---------------------------------------------------------*/ 3056 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3057 if (m == PETSC_DECIDE) { 3058 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3059 } else { 3060 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3061 } 3062 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3063 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3064 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3065 3066 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3067 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3068 3069 /* determine the number of messages to send, their lengths */ 3070 /*---------------------------------------------------------*/ 3071 len_s = merge->len_s; 3072 3073 len = 0; /* length of buf_si[] */ 3074 merge->nsend = 0; 3075 for (proc=0; proc<size; proc++){ 3076 len_si[proc] = 0; 3077 if (proc == rank){ 3078 len_s[proc] = 0; 3079 } else { 3080 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3081 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3082 } 3083 if (len_s[proc]) { 3084 merge->nsend++; 3085 nrows = 0; 3086 for (i=owners[proc]; i<owners[proc+1]; i++){ 3087 if (ai[i+1] > ai[i]) nrows++; 3088 } 3089 len_si[proc] = 2*(nrows+1); 3090 len += len_si[proc]; 3091 } 3092 } 3093 3094 /* determine the number and length of messages to receive for ij-structure */ 3095 /*-------------------------------------------------------------------------*/ 3096 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3097 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3098 3099 /* post the Irecv of j-structure */ 3100 /*-------------------------------*/ 3101 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3102 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3103 3104 /* post the Isend of j-structure */ 3105 /*--------------------------------*/ 3106 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3107 sj_waits = si_waits + merge->nsend; 3108 3109 for (proc=0, k=0; proc<size; proc++){ 3110 if (!len_s[proc]) continue; 3111 i = owners[proc]; 3112 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3113 k++; 3114 } 3115 3116 /* receives and sends of j-structure are complete */ 3117 /*------------------------------------------------*/ 3118 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3119 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3120 3121 /* send and recv i-structure */ 3122 /*---------------------------*/ 3123 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3124 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3125 3126 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3127 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3128 for (proc=0,k=0; proc<size; proc++){ 3129 if (!len_s[proc]) continue; 3130 /* form outgoing message for i-structure: 3131 buf_si[0]: nrows to be sent 3132 [1:nrows]: row index (global) 3133 [nrows+1:2*nrows+1]: i-structure index 3134 */ 3135 /*-------------------------------------------*/ 3136 nrows = len_si[proc]/2 - 1; 3137 buf_si_i = buf_si + nrows+1; 3138 buf_si[0] = nrows; 3139 buf_si_i[0] = 0; 3140 nrows = 0; 3141 for (i=owners[proc]; i<owners[proc+1]; i++){ 3142 anzi = ai[i+1] - ai[i]; 3143 if (anzi) { 3144 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3145 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3146 nrows++; 3147 } 3148 } 3149 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3150 k++; 3151 buf_si += len_si[proc]; 3152 } 3153 3154 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3155 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3156 3157 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3158 for (i=0; i<merge->nrecv; i++){ 3159 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3160 } 3161 3162 ierr = PetscFree(len_si);CHKERRQ(ierr); 3163 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3164 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3165 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3166 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3167 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3168 ierr = PetscFree(status);CHKERRQ(ierr); 3169 3170 /* compute a local seq matrix in each processor */ 3171 /*----------------------------------------------*/ 3172 /* allocate bi array and free space for accumulating nonzero column info */ 3173 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3174 bi[0] = 0; 3175 3176 /* create and initialize a linked list */ 3177 nlnk = N+1; 3178 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3179 3180 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3181 len = 0; 3182 len = ai[owners[rank+1]] - ai[owners[rank]]; 3183 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3184 current_space = free_space; 3185 3186 /* determine symbolic info for each local row */ 3187 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3188 nextrow = buf_ri_k + merge->nrecv; 3189 nextai = nextrow + merge->nrecv; 3190 for (k=0; k<merge->nrecv; k++){ 3191 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3192 nrows = *buf_ri_k[k]; 3193 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3194 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3195 } 3196 3197 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3198 len = 0; 3199 for (i=0;i<m;i++) { 3200 bnzi = 0; 3201 /* add local non-zero cols of this proc's seqmat into lnk */ 3202 arow = owners[rank] + i; 3203 anzi = ai[arow+1] - ai[arow]; 3204 aj = a->j + ai[arow]; 3205 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3206 bnzi += nlnk; 3207 /* add received col data into lnk */ 3208 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3209 if (i == *nextrow[k]) { /* i-th row */ 3210 anzi = *(nextai[k]+1) - *nextai[k]; 3211 aj = buf_rj[k] + *nextai[k]; 3212 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3213 bnzi += nlnk; 3214 nextrow[k]++; nextai[k]++; 3215 } 3216 } 3217 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3218 3219 /* if free space is not available, make more free space */ 3220 if (current_space->local_remaining<bnzi) { 3221 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3222 nspacedouble++; 3223 } 3224 /* copy data into free space, then initialize lnk */ 3225 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3226 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3227 3228 current_space->array += bnzi; 3229 current_space->local_used += bnzi; 3230 current_space->local_remaining -= bnzi; 3231 3232 bi[i+1] = bi[i] + bnzi; 3233 } 3234 3235 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3236 3237 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3238 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3239 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3240 3241 /* create symbolic parallel matrix B_mpi */ 3242 /*---------------------------------------*/ 3243 if (n==PETSC_DECIDE) { 3244 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3245 } else { 3246 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3247 } 3248 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3249 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3250 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3251 3252 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3253 B_mpi->assembled = PETSC_FALSE; 3254 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3255 merge->bi = bi; 3256 merge->bj = bj; 3257 merge->buf_ri = buf_ri; 3258 merge->buf_rj = buf_rj; 3259 merge->coi = PETSC_NULL; 3260 merge->coj = PETSC_NULL; 3261 merge->owners_co = PETSC_NULL; 3262 3263 /* attach the supporting struct to B_mpi for reuse */ 3264 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3265 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3266 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3267 *mpimat = B_mpi; 3268 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3269 PetscFunctionReturn(0); 3270 } 3271 3272 static PetscEvent logkey_seqstompi = 0; 3273 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3274 { 3275 PetscErrorCode ierr; 3276 3277 PetscFunctionBegin; 3278 if (!logkey_seqstompi) { 3279 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3280 } 3281 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3282 if (scall == MAT_INITIAL_MATRIX){ 3283 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3284 } 3285 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3286 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3287 PetscFunctionReturn(0); 3288 } 3289 static PetscEvent logkey_getlocalmat = 0; 3290 #undef __FUNCT__ 3291 #define __FUNCT__ "MatGetLocalMat" 3292 /*@C 3293 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3294 3295 Not Collective 3296 3297 Input Parameters: 3298 + A - the matrix 3299 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3300 3301 Output Parameter: 3302 . A_loc - the local sequential matrix generated 3303 3304 Level: developer 3305 3306 @*/ 3307 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3308 { 3309 PetscErrorCode ierr; 3310 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3311 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3312 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3313 PetscScalar *aa=a->a,*ba=b->a,*ca; 3314 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3315 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3316 3317 PetscFunctionBegin; 3318 if (!logkey_getlocalmat) { 3319 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3320 } 3321 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3322 if (scall == MAT_INITIAL_MATRIX){ 3323 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3324 ci[0] = 0; 3325 for (i=0; i<am; i++){ 3326 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3327 } 3328 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3329 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3330 k = 0; 3331 for (i=0; i<am; i++) { 3332 ncols_o = bi[i+1] - bi[i]; 3333 ncols_d = ai[i+1] - ai[i]; 3334 /* off-diagonal portion of A */ 3335 for (jo=0; jo<ncols_o; jo++) { 3336 col = cmap[*bj]; 3337 if (col >= cstart) break; 3338 cj[k] = col; bj++; 3339 ca[k++] = *ba++; 3340 } 3341 /* diagonal portion of A */ 3342 for (j=0; j<ncols_d; j++) { 3343 cj[k] = cstart + *aj++; 3344 ca[k++] = *aa++; 3345 } 3346 /* off-diagonal portion of A */ 3347 for (j=jo; j<ncols_o; j++) { 3348 cj[k] = cmap[*bj++]; 3349 ca[k++] = *ba++; 3350 } 3351 } 3352 /* put together the new matrix */ 3353 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3354 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3355 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3356 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3357 mat->freedata = PETSC_TRUE; 3358 mat->nonew = 0; 3359 } else if (scall == MAT_REUSE_MATRIX){ 3360 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3361 ci = mat->i; cj = mat->j; ca = mat->a; 3362 for (i=0; i<am; i++) { 3363 /* off-diagonal portion of A */ 3364 ncols_o = bi[i+1] - bi[i]; 3365 for (jo=0; jo<ncols_o; jo++) { 3366 col = cmap[*bj]; 3367 if (col >= cstart) break; 3368 *ca++ = *ba++; bj++; 3369 } 3370 /* diagonal portion of A */ 3371 ncols_d = ai[i+1] - ai[i]; 3372 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3373 /* off-diagonal portion of A */ 3374 for (j=jo; j<ncols_o; j++) { 3375 *ca++ = *ba++; bj++; 3376 } 3377 } 3378 } else { 3379 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3380 } 3381 3382 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3383 PetscFunctionReturn(0); 3384 } 3385 3386 static PetscEvent logkey_getlocalmatcondensed = 0; 3387 #undef __FUNCT__ 3388 #define __FUNCT__ "MatGetLocalMatCondensed" 3389 /*@C 3390 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3391 3392 Not Collective 3393 3394 Input Parameters: 3395 + A - the matrix 3396 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3397 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3398 3399 Output Parameter: 3400 . A_loc - the local sequential matrix generated 3401 3402 Level: developer 3403 3404 @*/ 3405 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3406 { 3407 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3408 PetscErrorCode ierr; 3409 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3410 IS isrowa,iscola; 3411 Mat *aloc; 3412 3413 PetscFunctionBegin; 3414 if (!logkey_getlocalmatcondensed) { 3415 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3416 } 3417 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3418 if (!row){ 3419 start = a->rstart; end = a->rend; 3420 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3421 } else { 3422 isrowa = *row; 3423 } 3424 if (!col){ 3425 start = a->cstart; 3426 cmap = a->garray; 3427 nzA = a->A->n; 3428 nzB = a->B->n; 3429 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3430 ncols = 0; 3431 for (i=0; i<nzB; i++) { 3432 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3433 else break; 3434 } 3435 imark = i; 3436 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3437 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3438 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3439 ierr = PetscFree(idx);CHKERRQ(ierr); 3440 } else { 3441 iscola = *col; 3442 } 3443 if (scall != MAT_INITIAL_MATRIX){ 3444 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3445 aloc[0] = *A_loc; 3446 } 3447 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3448 *A_loc = aloc[0]; 3449 ierr = PetscFree(aloc);CHKERRQ(ierr); 3450 if (!row){ 3451 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3452 } 3453 if (!col){ 3454 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3455 } 3456 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3457 PetscFunctionReturn(0); 3458 } 3459 3460 static PetscEvent logkey_GetBrowsOfAcols = 0; 3461 #undef __FUNCT__ 3462 #define __FUNCT__ "MatGetBrowsOfAcols" 3463 /*@C 3464 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3465 3466 Collective on Mat 3467 3468 Input Parameters: 3469 + A,B - the matrices in mpiaij format 3470 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3471 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3472 3473 Output Parameter: 3474 + rowb, colb - index sets of rows and columns of B to extract 3475 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3476 - B_seq - the sequential matrix generated 3477 3478 Level: developer 3479 3480 @*/ 3481 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3482 { 3483 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3484 PetscErrorCode ierr; 3485 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3486 IS isrowb,iscolb; 3487 Mat *bseq; 3488 3489 PetscFunctionBegin; 3490 if (a->cstart != b->rstart || a->cend != b->rend){ 3491 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3492 } 3493 if (!logkey_GetBrowsOfAcols) { 3494 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3495 } 3496 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3497 3498 if (scall == MAT_INITIAL_MATRIX){ 3499 start = a->cstart; 3500 cmap = a->garray; 3501 nzA = a->A->n; 3502 nzB = a->B->n; 3503 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3504 ncols = 0; 3505 for (i=0; i<nzB; i++) { /* row < local row index */ 3506 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3507 else break; 3508 } 3509 imark = i; 3510 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3511 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3512 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3513 ierr = PetscFree(idx);CHKERRQ(ierr); 3514 *brstart = imark; 3515 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3516 } else { 3517 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3518 isrowb = *rowb; iscolb = *colb; 3519 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3520 bseq[0] = *B_seq; 3521 } 3522 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3523 *B_seq = bseq[0]; 3524 ierr = PetscFree(bseq);CHKERRQ(ierr); 3525 if (!rowb){ 3526 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3527 } else { 3528 *rowb = isrowb; 3529 } 3530 if (!colb){ 3531 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3532 } else { 3533 *colb = iscolb; 3534 } 3535 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3536 PetscFunctionReturn(0); 3537 } 3538 3539 static PetscEvent logkey_GetBrowsOfAocols = 0; 3540 #undef __FUNCT__ 3541 #define __FUNCT__ "MatGetBrowsOfAoCols" 3542 /*@C 3543 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3544 of the OFF-DIAGONAL portion of local A 3545 3546 Collective on Mat 3547 3548 Input Parameters: 3549 + A,B - the matrices in mpiaij format 3550 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3551 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3552 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3553 3554 Output Parameter: 3555 + B_oth - the sequential matrix generated 3556 3557 Level: developer 3558 3559 @*/ 3560 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3561 { 3562 VecScatter_MPI_General *gen_to,*gen_from; 3563 PetscErrorCode ierr; 3564 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3565 Mat_SeqAIJ *b_oth; 3566 VecScatter ctx=a->Mvctx; 3567 MPI_Comm comm=ctx->comm; 3568 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3569 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3570 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3571 PetscInt i,k,l,nrecvs,nsends,nrows,*rrow,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 3572 MPI_Request *rwaits,*swaits; 3573 MPI_Status *sstatus,rstatus; 3574 PetscInt *cols; 3575 PetscScalar *vals; 3576 PetscMPIInt j; 3577 3578 PetscFunctionBegin; 3579 if (a->cstart != b->rstart || a->cend != b->rend){ 3580 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3581 } 3582 if (!logkey_GetBrowsOfAocols) { 3583 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3584 } 3585 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3586 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3587 3588 gen_to = (VecScatter_MPI_General*)ctx->todata; 3589 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3590 rvalues = gen_from->values; /* holds the length of sending row */ 3591 svalues = gen_to->values; /* holds the length of receiving row */ 3592 nrecvs = gen_from->n; 3593 nsends = gen_to->n; 3594 rwaits = gen_from->requests; 3595 swaits = gen_to->requests; 3596 rrow = gen_from->indices; /* local row index to be received */ 3597 srow = gen_to->indices; /* local row index to be sent */ 3598 rstarts = gen_from->starts; 3599 sstarts = gen_to->starts; 3600 rprocs = gen_from->procs; 3601 sprocs = gen_to->procs; 3602 sstatus = gen_to->sstatus; 3603 3604 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3605 if (scall == MAT_INITIAL_MATRIX){ 3606 /* i-array */ 3607 /*---------*/ 3608 /* post receives */ 3609 for (i=0; i<nrecvs; i++){ 3610 rowlen = (PetscInt*)rvalues + rstarts[i]; 3611 nrows = rstarts[i+1]-rstarts[i]; 3612 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3613 } 3614 3615 /* pack the outgoing message */ 3616 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3617 rstartsj = sstartsj + nsends +1; 3618 sstartsj[0] = 0; rstartsj[0] = 0; 3619 len = 0; /* total length of j or a array to be sent */ 3620 k = 0; 3621 for (i=0; i<nsends; i++){ 3622 rowlen = (PetscInt*)svalues + sstarts[i]; 3623 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3624 for (j=0; j<nrows; j++) { 3625 row = srow[k] + b->rowners[rank]; /* global row idx */ 3626 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3627 len += rowlen[j]; 3628 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3629 k++; 3630 } 3631 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3632 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3633 } 3634 /* recvs and sends of i-array are completed */ 3635 i = nrecvs; 3636 while (i--) { 3637 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3638 } 3639 if (nsends) { 3640 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3641 } 3642 /* allocate buffers for sending j and a arrays */ 3643 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3644 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3645 3646 /* create i-array of B_oth */ 3647 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3648 b_othi[0] = 0; 3649 len = 0; /* total length of j or a array to be received */ 3650 k = 0; 3651 for (i=0; i<nrecvs; i++){ 3652 rowlen = (PetscInt*)rvalues + rstarts[i]; 3653 nrows = rstarts[i+1]-rstarts[i]; 3654 for (j=0; j<nrows; j++) { 3655 b_othi[k+1] = b_othi[k] + rowlen[j]; 3656 len += rowlen[j]; k++; 3657 } 3658 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3659 } 3660 3661 /* allocate space for j and a arrrays of B_oth */ 3662 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3663 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3664 3665 /* j-array */ 3666 /*---------*/ 3667 /* post receives of j-array */ 3668 for (i=0; i<nrecvs; i++){ 3669 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3670 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3671 } 3672 k = 0; 3673 for (i=0; i<nsends; i++){ 3674 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3675 bufJ = bufj+sstartsj[i]; 3676 for (j=0; j<nrows; j++) { 3677 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3678 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3679 for (l=0; l<ncols; l++){ 3680 *bufJ++ = cols[l]; 3681 } 3682 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3683 } 3684 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3685 } 3686 3687 /* recvs and sends of j-array are completed */ 3688 i = nrecvs; 3689 while (i--) { 3690 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3691 } 3692 if (nsends) { 3693 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3694 } 3695 } else if (scall == MAT_REUSE_MATRIX){ 3696 sstartsj = *startsj; 3697 rstartsj = sstartsj + nsends +1; 3698 bufa = *bufa_ptr; 3699 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3700 b_otha = b_oth->a; 3701 } else { 3702 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3703 } 3704 3705 /* a-array */ 3706 /*---------*/ 3707 /* post receives of a-array */ 3708 for (i=0; i<nrecvs; i++){ 3709 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3710 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3711 } 3712 k = 0; 3713 for (i=0; i<nsends; i++){ 3714 nrows = sstarts[i+1]-sstarts[i]; 3715 bufA = bufa+sstartsj[i]; 3716 for (j=0; j<nrows; j++) { 3717 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3718 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3719 for (l=0; l<ncols; l++){ 3720 *bufA++ = vals[l]; 3721 } 3722 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3723 3724 } 3725 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3726 } 3727 /* recvs and sends of a-array are completed */ 3728 i = nrecvs; 3729 while (i--) { 3730 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3731 } 3732 if (nsends) { 3733 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3734 } 3735 3736 if (scall == MAT_INITIAL_MATRIX){ 3737 /* put together the new matrix */ 3738 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3739 3740 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3741 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3742 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3743 b_oth->freedata = PETSC_TRUE; 3744 b_oth->nonew = 0; 3745 3746 ierr = PetscFree(bufj);CHKERRQ(ierr); 3747 if (!startsj || !bufa_ptr){ 3748 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3749 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3750 } else { 3751 *startsj = sstartsj; 3752 *bufa_ptr = bufa; 3753 } 3754 } 3755 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3756 3757 PetscFunctionReturn(0); 3758 } 3759 3760 /*MC 3761 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3762 3763 Options Database Keys: 3764 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3765 3766 Level: beginner 3767 3768 .seealso: MatCreateMPIAIJ 3769 M*/ 3770 3771 EXTERN_C_BEGIN 3772 #undef __FUNCT__ 3773 #define __FUNCT__ "MatCreate_MPIAIJ" 3774 PetscErrorCode MatCreate_MPIAIJ(Mat B) 3775 { 3776 Mat_MPIAIJ *b; 3777 PetscErrorCode ierr; 3778 PetscInt i; 3779 PetscMPIInt size; 3780 3781 PetscFunctionBegin; 3782 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3783 3784 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3785 B->data = (void*)b; 3786 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3787 B->factor = 0; 3788 B->bs = 1; 3789 B->assembled = PETSC_FALSE; 3790 B->mapping = 0; 3791 3792 B->insertmode = NOT_SET_VALUES; 3793 b->size = size; 3794 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3795 3796 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3797 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3798 3799 /* the information in the maps duplicates the information computed below, eventually 3800 we should remove the duplicate information that is not contained in the maps */ 3801 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3802 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3803 3804 /* build local table of row and column ownerships */ 3805 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3806 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 3807 b->cowners = b->rowners + b->size + 2; 3808 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3809 b->rowners[0] = 0; 3810 for (i=2; i<=b->size; i++) { 3811 b->rowners[i] += b->rowners[i-1]; 3812 } 3813 b->rstart = b->rowners[b->rank]; 3814 b->rend = b->rowners[b->rank+1]; 3815 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3816 b->cowners[0] = 0; 3817 for (i=2; i<=b->size; i++) { 3818 b->cowners[i] += b->cowners[i-1]; 3819 } 3820 b->cstart = b->cowners[b->rank]; 3821 b->cend = b->cowners[b->rank+1]; 3822 3823 /* build cache for off array entries formed */ 3824 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3825 b->donotstash = PETSC_FALSE; 3826 b->colmap = 0; 3827 b->garray = 0; 3828 b->roworiented = PETSC_TRUE; 3829 3830 /* stuff used for matrix vector multiply */ 3831 b->lvec = PETSC_NULL; 3832 b->Mvctx = PETSC_NULL; 3833 3834 /* stuff for MatGetRow() */ 3835 b->rowindices = 0; 3836 b->rowvalues = 0; 3837 b->getrowactive = PETSC_FALSE; 3838 3839 /* Explicitly create 2 MATSEQAIJ matrices. */ 3840 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3841 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3842 PetscLogObjectParent(B,b->A); 3843 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3844 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3845 PetscLogObjectParent(B,b->B); 3846 3847 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3848 "MatStoreValues_MPIAIJ", 3849 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3850 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3851 "MatRetrieveValues_MPIAIJ", 3852 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3853 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3854 "MatGetDiagonalBlock_MPIAIJ", 3855 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3856 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3857 "MatIsTranspose_MPIAIJ", 3858 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3859 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3860 "MatMPIAIJSetPreallocation_MPIAIJ", 3861 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3862 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3863 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3864 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3865 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3866 "MatDiagonalScaleLocal_MPIAIJ", 3867 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3868 PetscFunctionReturn(0); 3869 } 3870 EXTERN_C_END 3871