1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (PetscInt*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(PetscInt));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (PetscInt*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(PetscInt));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 PetscErrorCode ierr; 191 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 192 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 PetscInt nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 PetscErrorCode ierr; 277 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 278 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 279 280 PetscFunctionBegin; 281 for (i=0; i<m; i++) { 282 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 283 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 284 if (idxm[i] >= rstart && idxm[i] < rend) { 285 row = idxm[i] - rstart; 286 for (j=0; j<n; j++) { 287 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 288 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 289 if (idxn[j] >= cstart && idxn[j] < cend){ 290 col = idxn[j] - cstart; 291 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 292 } else { 293 if (!aij->colmap) { 294 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined (PETSC_USE_CTABLE) 297 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 298 col --; 299 #else 300 col = aij->colmap[idxn[j]] - 1; 301 #endif 302 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 303 else { 304 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 305 } 306 } 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 310 } 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 318 { 319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 320 PetscErrorCode ierr; 321 PetscInt nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 337 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 338 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 339 PetscFunctionReturn(0); 340 } 341 342 #undef __FUNCT__ 343 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 344 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 345 { 346 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 347 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 348 PetscErrorCode ierr; 349 PetscMPIInt n; 350 PetscInt i,j,rstart,ncols,flg; 351 PetscInt *row,*col,other_disassembled; 352 PetscScalar *val; 353 InsertMode addv = mat->insertmode; 354 355 PetscFunctionBegin; 356 if (!aij->donotstash) { 357 while (1) { 358 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 359 if (!flg) break; 360 361 for (i=0; i<n;) { 362 /* Now identify the consecutive vals belonging to the same row */ 363 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 364 if (j < n) ncols = j-i; 365 else ncols = n-i; 366 /* Now assemble all these values with a single function call */ 367 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 368 i = j; 369 } 370 } 371 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 372 } 373 374 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 375 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 376 377 /* determine if any processor has disassembled, if so we must 378 also disassemble ourselfs, in order that we may reassemble. */ 379 /* 380 if nonzero structure of submatrix B cannot change then we know that 381 no processor disassembled thus we can skip this stuff 382 */ 383 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 384 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 385 if (mat->was_assembled && !other_disassembled) { 386 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 387 /* reaccess the b because aij->B was changed */ 388 b = (Mat_SeqAIJ *)aij->B->data; 389 } 390 } 391 392 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 393 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 396 b->inode.use = PETSC_FALSE; 397 b->compressedrow.use = PETSC_TRUE; 398 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 399 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 400 401 if (aij->rowvalues) { 402 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 403 aij->rowvalues = 0; 404 } 405 406 /* used by MatAXPY() */ 407 a->xtoy = 0; b->xtoy = 0; 408 a->XtoY = 0; b->XtoY = 0; 409 410 PetscFunctionReturn(0); 411 } 412 413 #undef __FUNCT__ 414 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 415 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 416 { 417 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 418 PetscErrorCode ierr; 419 420 PetscFunctionBegin; 421 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 422 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatZeroRows_MPIAIJ" 428 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 429 { 430 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 431 PetscErrorCode ierr; 432 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag; 433 PetscInt i,N,*rows,*owners = l->rowners; 434 PetscInt *nprocs,j,idx,nsends,row; 435 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 436 PetscInt *rvalues,count,base,slen,*source; 437 PetscInt *lens,*lrows,*values,rstart=l->rstart; 438 MPI_Comm comm = A->comm; 439 MPI_Request *send_waits,*recv_waits; 440 MPI_Status recv_status,*send_status; 441 IS istmp; 442 PetscTruth found; 443 444 PetscFunctionBegin; 445 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 446 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 447 448 /* first count number of contributors to each processor */ 449 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 450 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 451 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 452 for (i=0; i<N; i++) { 453 idx = rows[i]; 454 found = PETSC_FALSE; 455 for (j=0; j<size; j++) { 456 if (idx >= owners[j] && idx < owners[j+1]) { 457 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 458 } 459 } 460 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 461 } 462 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 463 464 /* inform other processors of number of messages and max length*/ 465 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 466 467 /* post receives: */ 468 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 469 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 470 for (i=0; i<nrecvs; i++) { 471 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 472 } 473 474 /* do sends: 475 1) starts[i] gives the starting index in svalues for stuff going to 476 the ith processor 477 */ 478 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 479 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 480 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 481 starts[0] = 0; 482 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 483 for (i=0; i<N; i++) { 484 svalues[starts[owner[i]]++] = rows[i]; 485 } 486 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 487 488 starts[0] = 0; 489 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 490 count = 0; 491 for (i=0; i<size; i++) { 492 if (nprocs[2*i+1]) { 493 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 494 } 495 } 496 ierr = PetscFree(starts);CHKERRQ(ierr); 497 498 base = owners[rank]; 499 500 /* wait on receives */ 501 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 502 source = lens + nrecvs; 503 count = nrecvs; slen = 0; 504 while (count) { 505 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 506 /* unpack receives into our local space */ 507 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 508 source[imdex] = recv_status.MPI_SOURCE; 509 lens[imdex] = n; 510 slen += n; 511 count--; 512 } 513 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 514 515 /* move the data into the send scatter */ 516 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 517 count = 0; 518 for (i=0; i<nrecvs; i++) { 519 values = rvalues + i*nmax; 520 for (j=0; j<lens[i]; j++) { 521 lrows[count++] = values[j] - base; 522 } 523 } 524 ierr = PetscFree(rvalues);CHKERRQ(ierr); 525 ierr = PetscFree(lens);CHKERRQ(ierr); 526 ierr = PetscFree(owner);CHKERRQ(ierr); 527 ierr = PetscFree(nprocs);CHKERRQ(ierr); 528 529 /* actually zap the local rows */ 530 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 531 PetscLogObjectParent(A,istmp); 532 533 /* 534 Zero the required rows. If the "diagonal block" of the matrix 535 is square and the user wishes to set the diagonal we use seperate 536 code so that MatSetValues() is not called for each diagonal allocating 537 new memory, thus calling lots of mallocs and slowing things down. 538 539 Contributed by: Mathew Knepley 540 */ 541 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 542 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 543 if (diag && (l->A->M == l->A->N)) { 544 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 545 } else if (diag) { 546 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 547 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 548 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 549 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 550 } 551 for (i = 0; i < slen; i++) { 552 row = lrows[i] + rstart; 553 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 554 } 555 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 556 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 557 } else { 558 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 559 } 560 ierr = ISDestroy(istmp);CHKERRQ(ierr); 561 ierr = PetscFree(lrows);CHKERRQ(ierr); 562 563 /* wait on sends */ 564 if (nsends) { 565 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 566 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 567 ierr = PetscFree(send_status);CHKERRQ(ierr); 568 } 569 ierr = PetscFree(send_waits);CHKERRQ(ierr); 570 ierr = PetscFree(svalues);CHKERRQ(ierr); 571 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatMult_MPIAIJ" 577 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 578 { 579 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 580 PetscErrorCode ierr; 581 PetscInt nt; 582 583 PetscFunctionBegin; 584 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 585 if (nt != A->n) { 586 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 587 } 588 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 589 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 590 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 591 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 592 PetscFunctionReturn(0); 593 } 594 595 #undef __FUNCT__ 596 #define __FUNCT__ "MatMultAdd_MPIAIJ" 597 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 598 { 599 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 600 PetscErrorCode ierr; 601 602 PetscFunctionBegin; 603 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 604 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 605 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 606 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 607 PetscFunctionReturn(0); 608 } 609 610 #undef __FUNCT__ 611 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 612 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 613 { 614 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 615 PetscErrorCode ierr; 616 617 PetscFunctionBegin; 618 /* do nondiagonal part */ 619 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 620 /* send it on its way */ 621 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 622 /* do local part */ 623 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 624 /* receive remote parts: note this assumes the values are not actually */ 625 /* inserted in yy until the next line, which is true for my implementation*/ 626 /* but is not perhaps always true. */ 627 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 EXTERN_C_BEGIN 632 #undef __FUNCT__ 633 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 634 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 635 { 636 MPI_Comm comm; 637 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 638 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 639 IS Me,Notme; 640 PetscErrorCode ierr; 641 PetscInt M,N,first,last,*notme,i; 642 PetscMPIInt size; 643 644 PetscFunctionBegin; 645 646 /* Easy test: symmetric diagonal block */ 647 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 648 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 649 if (!*f) PetscFunctionReturn(0); 650 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 651 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 652 if (size == 1) PetscFunctionReturn(0); 653 654 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 655 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 656 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 657 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 658 for (i=0; i<first; i++) notme[i] = i; 659 for (i=last; i<M; i++) notme[i-last+first] = i; 660 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 661 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 662 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 663 Aoff = Aoffs[0]; 664 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 665 Boff = Boffs[0]; 666 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 667 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 668 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 669 ierr = ISDestroy(Me);CHKERRQ(ierr); 670 ierr = ISDestroy(Notme);CHKERRQ(ierr); 671 672 PetscFunctionReturn(0); 673 } 674 EXTERN_C_END 675 676 #undef __FUNCT__ 677 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 678 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 679 { 680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 681 PetscErrorCode ierr; 682 683 PetscFunctionBegin; 684 /* do nondiagonal part */ 685 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 686 /* send it on its way */ 687 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 688 /* do local part */ 689 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 690 /* receive remote parts: note this assumes the values are not actually */ 691 /* inserted in yy until the next line, which is true for my implementation*/ 692 /* but is not perhaps always true. */ 693 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 /* 698 This only works correctly for square matrices where the subblock A->A is the 699 diagonal block 700 */ 701 #undef __FUNCT__ 702 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 703 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 704 { 705 PetscErrorCode ierr; 706 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 707 708 PetscFunctionBegin; 709 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 710 if (a->rstart != a->cstart || a->rend != a->cend) { 711 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 712 } 713 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 714 PetscFunctionReturn(0); 715 } 716 717 #undef __FUNCT__ 718 #define __FUNCT__ "MatScale_MPIAIJ" 719 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 720 { 721 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 722 PetscErrorCode ierr; 723 724 PetscFunctionBegin; 725 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 726 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatDestroy_MPIAIJ" 732 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 733 { 734 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 #if defined(PETSC_USE_LOG) 739 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 740 #endif 741 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 742 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 743 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 744 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 745 #if defined (PETSC_USE_CTABLE) 746 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 747 #else 748 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 749 #endif 750 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 751 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 752 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 753 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 754 ierr = PetscFree(aij);CHKERRQ(ierr); 755 756 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 757 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 758 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 759 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 760 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 761 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 762 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 763 PetscFunctionReturn(0); 764 } 765 766 #undef __FUNCT__ 767 #define __FUNCT__ "MatView_MPIAIJ_Binary" 768 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 769 { 770 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 771 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 772 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 773 PetscErrorCode ierr; 774 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 775 int fd; 776 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 777 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 778 PetscScalar *column_values; 779 780 PetscFunctionBegin; 781 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 782 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 783 nz = A->nz + B->nz; 784 if (!rank) { 785 header[0] = MAT_FILE_COOKIE; 786 header[1] = mat->M; 787 header[2] = mat->N; 788 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 789 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 790 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 791 /* get largest number of rows any processor has */ 792 rlen = mat->m; 793 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 794 for (i=1; i<size; i++) { 795 rlen = PetscMax(rlen,range[i+1] - range[i]); 796 } 797 } else { 798 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 799 rlen = mat->m; 800 } 801 802 /* load up the local row counts */ 803 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 804 for (i=0; i<mat->m; i++) { 805 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 806 } 807 808 /* store the row lengths to the file */ 809 if (!rank) { 810 MPI_Status status; 811 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 812 for (i=1; i<size; i++) { 813 rlen = range[i+1] - range[i]; 814 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 815 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 816 } 817 } else { 818 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 819 } 820 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 821 822 /* load up the local column indices */ 823 nzmax = nz; /* )th processor needs space a largest processor needs */ 824 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 825 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 826 cnt = 0; 827 for (i=0; i<mat->m; i++) { 828 for (j=B->i[i]; j<B->i[i+1]; j++) { 829 if ( (col = garray[B->j[j]]) > cstart) break; 830 column_indices[cnt++] = col; 831 } 832 for (k=A->i[i]; k<A->i[i+1]; k++) { 833 column_indices[cnt++] = A->j[k] + cstart; 834 } 835 for (; j<B->i[i+1]; j++) { 836 column_indices[cnt++] = garray[B->j[j]]; 837 } 838 } 839 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 840 841 /* store the column indices to the file */ 842 if (!rank) { 843 MPI_Status status; 844 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 845 for (i=1; i<size; i++) { 846 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 847 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 848 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 849 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 850 } 851 } else { 852 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 853 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 854 } 855 ierr = PetscFree(column_indices);CHKERRQ(ierr); 856 857 /* load up the local column values */ 858 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 859 cnt = 0; 860 for (i=0; i<mat->m; i++) { 861 for (j=B->i[i]; j<B->i[i+1]; j++) { 862 if ( garray[B->j[j]] > cstart) break; 863 column_values[cnt++] = B->a[j]; 864 } 865 for (k=A->i[i]; k<A->i[i+1]; k++) { 866 column_values[cnt++] = A->a[k]; 867 } 868 for (; j<B->i[i+1]; j++) { 869 column_values[cnt++] = B->a[j]; 870 } 871 } 872 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 873 874 /* store the column values to the file */ 875 if (!rank) { 876 MPI_Status status; 877 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 878 for (i=1; i<size; i++) { 879 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 880 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 881 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 882 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 883 } 884 } else { 885 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 886 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 887 } 888 ierr = PetscFree(column_values);CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 892 #undef __FUNCT__ 893 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 894 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 895 { 896 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 897 PetscErrorCode ierr; 898 PetscMPIInt rank = aij->rank,size = aij->size; 899 PetscTruth isdraw,iascii,flg,isbinary; 900 PetscViewer sviewer; 901 PetscViewerFormat format; 902 903 PetscFunctionBegin; 904 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 905 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 906 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 907 if (iascii) { 908 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 909 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 910 MatInfo info; 911 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 912 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 913 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 914 if (flg) { 915 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 916 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 917 } else { 918 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 919 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 920 } 921 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 922 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 923 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 924 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 925 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 926 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 927 PetscFunctionReturn(0); 928 } else if (format == PETSC_VIEWER_ASCII_INFO) { 929 PetscFunctionReturn(0); 930 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 931 PetscFunctionReturn(0); 932 } 933 } else if (isbinary) { 934 if (size == 1) { 935 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 936 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 937 } else { 938 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 939 } 940 PetscFunctionReturn(0); 941 } else if (isdraw) { 942 PetscDraw draw; 943 PetscTruth isnull; 944 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 945 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 946 } 947 948 if (size == 1) { 949 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 950 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 951 } else { 952 /* assemble the entire matrix onto first processor. */ 953 Mat A; 954 Mat_SeqAIJ *Aloc; 955 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 956 PetscScalar *a; 957 958 if (!rank) { 959 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 960 } else { 961 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 962 } 963 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 964 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 965 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 966 PetscLogObjectParent(mat,A); 967 968 /* copy over the A part */ 969 Aloc = (Mat_SeqAIJ*)aij->A->data; 970 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 971 row = aij->rstart; 972 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 973 for (i=0; i<m; i++) { 974 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 975 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 976 } 977 aj = Aloc->j; 978 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 979 980 /* copy over the B part */ 981 Aloc = (Mat_SeqAIJ*)aij->B->data; 982 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 983 row = aij->rstart; 984 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 985 ct = cols; 986 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 987 for (i=0; i<m; i++) { 988 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 989 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 990 } 991 ierr = PetscFree(ct);CHKERRQ(ierr); 992 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 993 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 994 /* 995 Everyone has to call to draw the matrix since the graphics waits are 996 synchronized across all processors that share the PetscDraw object 997 */ 998 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 999 if (!rank) { 1000 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 1001 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1002 } 1003 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1004 ierr = MatDestroy(A);CHKERRQ(ierr); 1005 } 1006 PetscFunctionReturn(0); 1007 } 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatView_MPIAIJ" 1011 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1012 { 1013 PetscErrorCode ierr; 1014 PetscTruth iascii,isdraw,issocket,isbinary; 1015 1016 PetscFunctionBegin; 1017 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1018 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1019 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1020 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1021 if (iascii || isdraw || isbinary || issocket) { 1022 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1023 } else { 1024 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1025 } 1026 PetscFunctionReturn(0); 1027 } 1028 1029 1030 1031 #undef __FUNCT__ 1032 #define __FUNCT__ "MatRelax_MPIAIJ" 1033 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1034 { 1035 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1036 PetscErrorCode ierr; 1037 Vec bb1; 1038 PetscScalar mone=-1.0; 1039 1040 PetscFunctionBegin; 1041 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1042 1043 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1044 1045 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1046 if (flag & SOR_ZERO_INITIAL_GUESS) { 1047 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1048 its--; 1049 } 1050 1051 while (its--) { 1052 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1053 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1054 1055 /* update rhs: bb1 = bb - B*x */ 1056 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1057 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1058 1059 /* local sweep */ 1060 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1061 CHKERRQ(ierr); 1062 } 1063 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1064 if (flag & SOR_ZERO_INITIAL_GUESS) { 1065 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1066 its--; 1067 } 1068 while (its--) { 1069 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1070 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1071 1072 /* update rhs: bb1 = bb - B*x */ 1073 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1074 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1075 1076 /* local sweep */ 1077 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1078 CHKERRQ(ierr); 1079 } 1080 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1081 if (flag & SOR_ZERO_INITIAL_GUESS) { 1082 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1083 its--; 1084 } 1085 while (its--) { 1086 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1087 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1088 1089 /* update rhs: bb1 = bb - B*x */ 1090 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1091 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1092 1093 /* local sweep */ 1094 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1095 CHKERRQ(ierr); 1096 } 1097 } else { 1098 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1099 } 1100 1101 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNCT__ 1106 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1107 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1108 { 1109 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1110 Mat A = mat->A,B = mat->B; 1111 PetscErrorCode ierr; 1112 PetscReal isend[5],irecv[5]; 1113 1114 PetscFunctionBegin; 1115 info->block_size = 1.0; 1116 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1117 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1118 isend[3] = info->memory; isend[4] = info->mallocs; 1119 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1120 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1121 isend[3] += info->memory; isend[4] += info->mallocs; 1122 if (flag == MAT_LOCAL) { 1123 info->nz_used = isend[0]; 1124 info->nz_allocated = isend[1]; 1125 info->nz_unneeded = isend[2]; 1126 info->memory = isend[3]; 1127 info->mallocs = isend[4]; 1128 } else if (flag == MAT_GLOBAL_MAX) { 1129 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1130 info->nz_used = irecv[0]; 1131 info->nz_allocated = irecv[1]; 1132 info->nz_unneeded = irecv[2]; 1133 info->memory = irecv[3]; 1134 info->mallocs = irecv[4]; 1135 } else if (flag == MAT_GLOBAL_SUM) { 1136 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1137 info->nz_used = irecv[0]; 1138 info->nz_allocated = irecv[1]; 1139 info->nz_unneeded = irecv[2]; 1140 info->memory = irecv[3]; 1141 info->mallocs = irecv[4]; 1142 } 1143 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1144 info->fill_ratio_needed = 0; 1145 info->factor_mallocs = 0; 1146 info->rows_global = (double)matin->M; 1147 info->columns_global = (double)matin->N; 1148 info->rows_local = (double)matin->m; 1149 info->columns_local = (double)matin->N; 1150 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatSetOption_MPIAIJ" 1156 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1157 { 1158 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1159 PetscErrorCode ierr; 1160 1161 PetscFunctionBegin; 1162 switch (op) { 1163 case MAT_NO_NEW_NONZERO_LOCATIONS: 1164 case MAT_YES_NEW_NONZERO_LOCATIONS: 1165 case MAT_COLUMNS_UNSORTED: 1166 case MAT_COLUMNS_SORTED: 1167 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1168 case MAT_KEEP_ZEROED_ROWS: 1169 case MAT_NEW_NONZERO_LOCATION_ERR: 1170 case MAT_USE_INODES: 1171 case MAT_DO_NOT_USE_INODES: 1172 case MAT_IGNORE_ZERO_ENTRIES: 1173 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1174 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1175 break; 1176 case MAT_ROW_ORIENTED: 1177 a->roworiented = PETSC_TRUE; 1178 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1179 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1180 break; 1181 case MAT_ROWS_SORTED: 1182 case MAT_ROWS_UNSORTED: 1183 case MAT_YES_NEW_DIAGONALS: 1184 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1185 break; 1186 case MAT_COLUMN_ORIENTED: 1187 a->roworiented = PETSC_FALSE; 1188 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1189 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1190 break; 1191 case MAT_IGNORE_OFF_PROC_ENTRIES: 1192 a->donotstash = PETSC_TRUE; 1193 break; 1194 case MAT_NO_NEW_DIAGONALS: 1195 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1196 case MAT_SYMMETRIC: 1197 case MAT_STRUCTURALLY_SYMMETRIC: 1198 case MAT_HERMITIAN: 1199 case MAT_SYMMETRY_ETERNAL: 1200 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1201 break; 1202 case MAT_NOT_SYMMETRIC: 1203 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1204 case MAT_NOT_HERMITIAN: 1205 case MAT_NOT_SYMMETRY_ETERNAL: 1206 break; 1207 default: 1208 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1209 } 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatGetRow_MPIAIJ" 1215 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1216 { 1217 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1218 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1219 PetscErrorCode ierr; 1220 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1221 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1222 PetscInt *cmap,*idx_p; 1223 1224 PetscFunctionBegin; 1225 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1226 mat->getrowactive = PETSC_TRUE; 1227 1228 if (!mat->rowvalues && (idx || v)) { 1229 /* 1230 allocate enough space to hold information from the longest row. 1231 */ 1232 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1233 PetscInt max = 1,tmp; 1234 for (i=0; i<matin->m; i++) { 1235 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1236 if (max < tmp) { max = tmp; } 1237 } 1238 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1239 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1240 } 1241 1242 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1243 lrow = row - rstart; 1244 1245 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1246 if (!v) {pvA = 0; pvB = 0;} 1247 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1248 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1249 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1250 nztot = nzA + nzB; 1251 1252 cmap = mat->garray; 1253 if (v || idx) { 1254 if (nztot) { 1255 /* Sort by increasing column numbers, assuming A and B already sorted */ 1256 PetscInt imark = -1; 1257 if (v) { 1258 *v = v_p = mat->rowvalues; 1259 for (i=0; i<nzB; i++) { 1260 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1261 else break; 1262 } 1263 imark = i; 1264 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1265 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1266 } 1267 if (idx) { 1268 *idx = idx_p = mat->rowindices; 1269 if (imark > -1) { 1270 for (i=0; i<imark; i++) { 1271 idx_p[i] = cmap[cworkB[i]]; 1272 } 1273 } else { 1274 for (i=0; i<nzB; i++) { 1275 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1276 else break; 1277 } 1278 imark = i; 1279 } 1280 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1281 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1282 } 1283 } else { 1284 if (idx) *idx = 0; 1285 if (v) *v = 0; 1286 } 1287 } 1288 *nz = nztot; 1289 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1290 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1291 PetscFunctionReturn(0); 1292 } 1293 1294 #undef __FUNCT__ 1295 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1296 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1297 { 1298 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1299 1300 PetscFunctionBegin; 1301 if (aij->getrowactive == PETSC_FALSE) { 1302 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1303 } 1304 aij->getrowactive = PETSC_FALSE; 1305 PetscFunctionReturn(0); 1306 } 1307 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatNorm_MPIAIJ" 1310 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1311 { 1312 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1313 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1314 PetscErrorCode ierr; 1315 PetscInt i,j,cstart = aij->cstart; 1316 PetscReal sum = 0.0; 1317 PetscScalar *v; 1318 1319 PetscFunctionBegin; 1320 if (aij->size == 1) { 1321 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1322 } else { 1323 if (type == NORM_FROBENIUS) { 1324 v = amat->a; 1325 for (i=0; i<amat->nz; i++) { 1326 #if defined(PETSC_USE_COMPLEX) 1327 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1328 #else 1329 sum += (*v)*(*v); v++; 1330 #endif 1331 } 1332 v = bmat->a; 1333 for (i=0; i<bmat->nz; i++) { 1334 #if defined(PETSC_USE_COMPLEX) 1335 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1336 #else 1337 sum += (*v)*(*v); v++; 1338 #endif 1339 } 1340 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1341 *norm = sqrt(*norm); 1342 } else if (type == NORM_1) { /* max column norm */ 1343 PetscReal *tmp,*tmp2; 1344 PetscInt *jj,*garray = aij->garray; 1345 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1346 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1347 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1348 *norm = 0.0; 1349 v = amat->a; jj = amat->j; 1350 for (j=0; j<amat->nz; j++) { 1351 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1352 } 1353 v = bmat->a; jj = bmat->j; 1354 for (j=0; j<bmat->nz; j++) { 1355 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1356 } 1357 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1358 for (j=0; j<mat->N; j++) { 1359 if (tmp2[j] > *norm) *norm = tmp2[j]; 1360 } 1361 ierr = PetscFree(tmp);CHKERRQ(ierr); 1362 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1363 } else if (type == NORM_INFINITY) { /* max row norm */ 1364 PetscReal ntemp = 0.0; 1365 for (j=0; j<aij->A->m; j++) { 1366 v = amat->a + amat->i[j]; 1367 sum = 0.0; 1368 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1369 sum += PetscAbsScalar(*v); v++; 1370 } 1371 v = bmat->a + bmat->i[j]; 1372 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1373 sum += PetscAbsScalar(*v); v++; 1374 } 1375 if (sum > ntemp) ntemp = sum; 1376 } 1377 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1378 } else { 1379 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1380 } 1381 } 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatTranspose_MPIAIJ" 1387 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1388 { 1389 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1390 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1391 PetscErrorCode ierr; 1392 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1393 Mat B; 1394 PetscScalar *array; 1395 1396 PetscFunctionBegin; 1397 if (!matout && M != N) { 1398 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1399 } 1400 1401 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1402 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1403 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1404 1405 /* copy over the A part */ 1406 Aloc = (Mat_SeqAIJ*)a->A->data; 1407 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1408 row = a->rstart; 1409 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1410 for (i=0; i<m; i++) { 1411 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1412 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1413 } 1414 aj = Aloc->j; 1415 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1416 1417 /* copy over the B part */ 1418 Aloc = (Mat_SeqAIJ*)a->B->data; 1419 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1420 row = a->rstart; 1421 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1422 ct = cols; 1423 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1424 for (i=0; i<m; i++) { 1425 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1426 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1427 } 1428 ierr = PetscFree(ct);CHKERRQ(ierr); 1429 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1430 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1431 if (matout) { 1432 *matout = B; 1433 } else { 1434 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1435 } 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1441 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1442 { 1443 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1444 Mat a = aij->A,b = aij->B; 1445 PetscErrorCode ierr; 1446 PetscInt s1,s2,s3; 1447 1448 PetscFunctionBegin; 1449 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1450 if (rr) { 1451 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1452 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1453 /* Overlap communication with computation. */ 1454 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1455 } 1456 if (ll) { 1457 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1458 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1459 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1460 } 1461 /* scale the diagonal block */ 1462 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1463 1464 if (rr) { 1465 /* Do a scatter end and then right scale the off-diagonal block */ 1466 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1467 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1468 } 1469 1470 PetscFunctionReturn(0); 1471 } 1472 1473 1474 #undef __FUNCT__ 1475 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1476 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1477 { 1478 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1479 PetscErrorCode ierr; 1480 1481 PetscFunctionBegin; 1482 if (!a->rank) { 1483 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1484 } 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1490 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1491 { 1492 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1493 PetscErrorCode ierr; 1494 1495 PetscFunctionBegin; 1496 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1497 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1502 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1503 { 1504 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1505 PetscErrorCode ierr; 1506 1507 PetscFunctionBegin; 1508 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1509 PetscFunctionReturn(0); 1510 } 1511 1512 #undef __FUNCT__ 1513 #define __FUNCT__ "MatEqual_MPIAIJ" 1514 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1515 { 1516 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1517 Mat a,b,c,d; 1518 PetscTruth flg; 1519 PetscErrorCode ierr; 1520 1521 PetscFunctionBegin; 1522 a = matA->A; b = matA->B; 1523 c = matB->A; d = matB->B; 1524 1525 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1526 if (flg == PETSC_TRUE) { 1527 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1528 } 1529 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #undef __FUNCT__ 1534 #define __FUNCT__ "MatCopy_MPIAIJ" 1535 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1536 { 1537 PetscErrorCode ierr; 1538 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1539 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1540 1541 PetscFunctionBegin; 1542 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1543 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1544 /* because of the column compression in the off-processor part of the matrix a->B, 1545 the number of columns in a->B and b->B may be different, hence we cannot call 1546 the MatCopy() directly on the two parts. If need be, we can provide a more 1547 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1548 then copying the submatrices */ 1549 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1550 } else { 1551 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1552 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1553 } 1554 PetscFunctionReturn(0); 1555 } 1556 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1559 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1560 { 1561 PetscErrorCode ierr; 1562 1563 PetscFunctionBegin; 1564 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1565 PetscFunctionReturn(0); 1566 } 1567 1568 #include "petscblaslapack.h" 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "MatAXPY_MPIAIJ" 1571 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1572 { 1573 PetscErrorCode ierr; 1574 PetscInt i; 1575 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1576 PetscBLASInt bnz,one=1; 1577 Mat_SeqAIJ *x,*y; 1578 1579 PetscFunctionBegin; 1580 if (str == SAME_NONZERO_PATTERN) { 1581 x = (Mat_SeqAIJ *)xx->A->data; 1582 y = (Mat_SeqAIJ *)yy->A->data; 1583 bnz = (PetscBLASInt)x->nz; 1584 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1585 x = (Mat_SeqAIJ *)xx->B->data; 1586 y = (Mat_SeqAIJ *)yy->B->data; 1587 bnz = (PetscBLASInt)x->nz; 1588 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1589 } else if (str == SUBSET_NONZERO_PATTERN) { 1590 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1591 1592 x = (Mat_SeqAIJ *)xx->B->data; 1593 y = (Mat_SeqAIJ *)yy->B->data; 1594 if (y->xtoy && y->XtoY != xx->B) { 1595 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1596 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1597 } 1598 if (!y->xtoy) { /* get xtoy */ 1599 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1600 y->XtoY = xx->B; 1601 } 1602 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1603 } else { 1604 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1605 } 1606 PetscFunctionReturn(0); 1607 } 1608 1609 /* -------------------------------------------------------------------*/ 1610 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1611 MatGetRow_MPIAIJ, 1612 MatRestoreRow_MPIAIJ, 1613 MatMult_MPIAIJ, 1614 /* 4*/ MatMultAdd_MPIAIJ, 1615 MatMultTranspose_MPIAIJ, 1616 MatMultTransposeAdd_MPIAIJ, 1617 0, 1618 0, 1619 0, 1620 /*10*/ 0, 1621 0, 1622 0, 1623 MatRelax_MPIAIJ, 1624 MatTranspose_MPIAIJ, 1625 /*15*/ MatGetInfo_MPIAIJ, 1626 MatEqual_MPIAIJ, 1627 MatGetDiagonal_MPIAIJ, 1628 MatDiagonalScale_MPIAIJ, 1629 MatNorm_MPIAIJ, 1630 /*20*/ MatAssemblyBegin_MPIAIJ, 1631 MatAssemblyEnd_MPIAIJ, 1632 0, 1633 MatSetOption_MPIAIJ, 1634 MatZeroEntries_MPIAIJ, 1635 /*25*/ MatZeroRows_MPIAIJ, 1636 0, 1637 0, 1638 0, 1639 0, 1640 /*30*/ MatSetUpPreallocation_MPIAIJ, 1641 0, 1642 0, 1643 0, 1644 0, 1645 /*35*/ MatDuplicate_MPIAIJ, 1646 0, 1647 0, 1648 0, 1649 0, 1650 /*40*/ MatAXPY_MPIAIJ, 1651 MatGetSubMatrices_MPIAIJ, 1652 MatIncreaseOverlap_MPIAIJ, 1653 MatGetValues_MPIAIJ, 1654 MatCopy_MPIAIJ, 1655 /*45*/ MatPrintHelp_MPIAIJ, 1656 MatScale_MPIAIJ, 1657 0, 1658 0, 1659 0, 1660 /*50*/ MatSetBlockSize_MPIAIJ, 1661 0, 1662 0, 1663 0, 1664 0, 1665 /*55*/ MatFDColoringCreate_MPIAIJ, 1666 0, 1667 MatSetUnfactored_MPIAIJ, 1668 0, 1669 0, 1670 /*60*/ MatGetSubMatrix_MPIAIJ, 1671 MatDestroy_MPIAIJ, 1672 MatView_MPIAIJ, 1673 MatGetPetscMaps_Petsc, 1674 0, 1675 /*65*/ 0, 1676 0, 1677 0, 1678 0, 1679 0, 1680 /*70*/ 0, 1681 0, 1682 MatSetColoring_MPIAIJ, 1683 MatSetValuesAdic_MPIAIJ, 1684 MatSetValuesAdifor_MPIAIJ, 1685 /*75*/ 0, 1686 0, 1687 0, 1688 0, 1689 0, 1690 /*80*/ 0, 1691 0, 1692 0, 1693 0, 1694 /*84*/ MatLoad_MPIAIJ, 1695 0, 1696 0, 1697 0, 1698 0, 1699 0, 1700 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1701 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1702 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1703 MatPtAP_MPIAIJ_MPIAIJ, 1704 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1705 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 1706 0, 1707 0, 1708 0}; 1709 1710 /* ----------------------------------------------------------------------------------------*/ 1711 1712 EXTERN_C_BEGIN 1713 #undef __FUNCT__ 1714 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1715 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1716 { 1717 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1718 PetscErrorCode ierr; 1719 1720 PetscFunctionBegin; 1721 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1722 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1723 PetscFunctionReturn(0); 1724 } 1725 EXTERN_C_END 1726 1727 EXTERN_C_BEGIN 1728 #undef __FUNCT__ 1729 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1730 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1731 { 1732 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1733 PetscErrorCode ierr; 1734 1735 PetscFunctionBegin; 1736 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1737 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1738 PetscFunctionReturn(0); 1739 } 1740 EXTERN_C_END 1741 1742 #include "petscpc.h" 1743 EXTERN_C_BEGIN 1744 #undef __FUNCT__ 1745 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1746 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1747 { 1748 Mat_MPIAIJ *b; 1749 PetscErrorCode ierr; 1750 PetscInt i; 1751 1752 PetscFunctionBegin; 1753 B->preallocated = PETSC_TRUE; 1754 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1755 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1756 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1757 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1758 if (d_nnz) { 1759 for (i=0; i<B->m; i++) { 1760 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1761 } 1762 } 1763 if (o_nnz) { 1764 for (i=0; i<B->m; i++) { 1765 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1766 } 1767 } 1768 b = (Mat_MPIAIJ*)B->data; 1769 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1770 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1771 1772 PetscFunctionReturn(0); 1773 } 1774 EXTERN_C_END 1775 1776 /*MC 1777 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1778 1779 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1780 and MATMPIAIJ otherwise. As a result, for single process communicators, 1781 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1782 for communicators controlling multiple processes. It is recommended that you call both of 1783 the above preallocation routines for simplicity. 1784 1785 Options Database Keys: 1786 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1787 1788 Level: beginner 1789 1790 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1791 M*/ 1792 1793 EXTERN_C_BEGIN 1794 #undef __FUNCT__ 1795 #define __FUNCT__ "MatCreate_AIJ" 1796 PetscErrorCode MatCreate_AIJ(Mat A) 1797 { 1798 PetscErrorCode ierr; 1799 PetscMPIInt size; 1800 1801 PetscFunctionBegin; 1802 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1803 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1804 if (size == 1) { 1805 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1806 } else { 1807 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1808 } 1809 PetscFunctionReturn(0); 1810 } 1811 EXTERN_C_END 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1815 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1816 { 1817 Mat mat; 1818 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1819 PetscErrorCode ierr; 1820 1821 PetscFunctionBegin; 1822 *newmat = 0; 1823 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1824 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1825 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1826 a = (Mat_MPIAIJ*)mat->data; 1827 1828 mat->factor = matin->factor; 1829 mat->bs = matin->bs; 1830 mat->assembled = PETSC_TRUE; 1831 mat->insertmode = NOT_SET_VALUES; 1832 mat->preallocated = PETSC_TRUE; 1833 1834 a->rstart = oldmat->rstart; 1835 a->rend = oldmat->rend; 1836 a->cstart = oldmat->cstart; 1837 a->cend = oldmat->cend; 1838 a->size = oldmat->size; 1839 a->rank = oldmat->rank; 1840 a->donotstash = oldmat->donotstash; 1841 a->roworiented = oldmat->roworiented; 1842 a->rowindices = 0; 1843 a->rowvalues = 0; 1844 a->getrowactive = PETSC_FALSE; 1845 1846 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1847 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1848 if (oldmat->colmap) { 1849 #if defined (PETSC_USE_CTABLE) 1850 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1851 #else 1852 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1853 PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt)); 1854 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1855 #endif 1856 } else a->colmap = 0; 1857 if (oldmat->garray) { 1858 PetscInt len; 1859 len = oldmat->B->n; 1860 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1861 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 1862 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1863 } else a->garray = 0; 1864 1865 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1866 PetscLogObjectParent(mat,a->lvec); 1867 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1868 PetscLogObjectParent(mat,a->Mvctx); 1869 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1870 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1871 PetscLogObjectParent(mat,a->A); 1872 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1873 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1874 PetscLogObjectParent(mat,a->B); 1875 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1876 *newmat = mat; 1877 PetscFunctionReturn(0); 1878 } 1879 1880 #include "petscsys.h" 1881 1882 #undef __FUNCT__ 1883 #define __FUNCT__ "MatLoad_MPIAIJ" 1884 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1885 { 1886 Mat A; 1887 PetscScalar *vals,*svals; 1888 MPI_Comm comm = ((PetscObject)viewer)->comm; 1889 MPI_Status status; 1890 PetscErrorCode ierr; 1891 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz; 1892 PetscInt i,nz,j,rstart,rend; 1893 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1894 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1895 PetscInt cend,cstart,n,*rowners; 1896 int fd; 1897 1898 PetscFunctionBegin; 1899 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1900 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1901 if (!rank) { 1902 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1903 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1904 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1905 } 1906 1907 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1908 M = header[1]; N = header[2]; 1909 /* determine ownership of all rows */ 1910 m = M/size + ((M % size) > rank); 1911 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1912 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 1913 rowners[0] = 0; 1914 for (i=2; i<=size; i++) { 1915 rowners[i] += rowners[i-1]; 1916 } 1917 rstart = rowners[rank]; 1918 rend = rowners[rank+1]; 1919 1920 /* distribute row lengths to all processors */ 1921 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 1922 if (!rank) { 1923 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 1924 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1925 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1926 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1927 for (j=0; j<m; j++) { 1928 procsnz[0] += ourlens[j]; 1929 } 1930 for (i=1; i<size; i++) { 1931 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 1932 /* calculate the number of nonzeros on each processor */ 1933 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 1934 procsnz[i] += rowlengths[j]; 1935 } 1936 ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1937 } 1938 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1939 } else { 1940 ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1941 } 1942 1943 if (!rank) { 1944 /* determine max buffer needed and allocate it */ 1945 maxnz = 0; 1946 for (i=0; i<size; i++) { 1947 maxnz = PetscMax(maxnz,procsnz[i]); 1948 } 1949 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1950 1951 /* read in my part of the matrix column indices */ 1952 nz = procsnz[0]; 1953 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1954 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1955 1956 /* read in every one elses and ship off */ 1957 for (i=1; i<size; i++) { 1958 nz = procsnz[i]; 1959 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1960 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1961 } 1962 ierr = PetscFree(cols);CHKERRQ(ierr); 1963 } else { 1964 /* determine buffer space needed for message */ 1965 nz = 0; 1966 for (i=0; i<m; i++) { 1967 nz += ourlens[i]; 1968 } 1969 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1970 1971 /* receive message of column indices*/ 1972 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1973 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1974 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1975 } 1976 1977 /* determine column ownership if matrix is not square */ 1978 if (N != M) { 1979 n = N/size + ((N % size) > rank); 1980 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1981 cstart = cend - n; 1982 } else { 1983 cstart = rstart; 1984 cend = rend; 1985 n = cend - cstart; 1986 } 1987 1988 /* loop over local rows, determining number of off diagonal entries */ 1989 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1990 jj = 0; 1991 for (i=0; i<m; i++) { 1992 for (j=0; j<ourlens[i]; j++) { 1993 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1994 jj++; 1995 } 1996 } 1997 1998 /* create our matrix */ 1999 for (i=0; i<m; i++) { 2000 ourlens[i] -= offlens[i]; 2001 } 2002 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2003 ierr = MatSetType(A,type);CHKERRQ(ierr); 2004 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2005 2006 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2007 for (i=0; i<m; i++) { 2008 ourlens[i] += offlens[i]; 2009 } 2010 2011 if (!rank) { 2012 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2013 2014 /* read in my part of the matrix numerical values */ 2015 nz = procsnz[0]; 2016 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2017 2018 /* insert into matrix */ 2019 jj = rstart; 2020 smycols = mycols; 2021 svals = vals; 2022 for (i=0; i<m; i++) { 2023 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2024 smycols += ourlens[i]; 2025 svals += ourlens[i]; 2026 jj++; 2027 } 2028 2029 /* read in other processors and ship out */ 2030 for (i=1; i<size; i++) { 2031 nz = procsnz[i]; 2032 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2033 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2034 } 2035 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2036 } else { 2037 /* receive numeric values */ 2038 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2039 2040 /* receive message of values*/ 2041 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2042 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2043 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2044 2045 /* insert into matrix */ 2046 jj = rstart; 2047 smycols = mycols; 2048 svals = vals; 2049 for (i=0; i<m; i++) { 2050 ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2051 smycols += ourlens[i]; 2052 svals += ourlens[i]; 2053 jj++; 2054 } 2055 } 2056 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2057 ierr = PetscFree(vals);CHKERRQ(ierr); 2058 ierr = PetscFree(mycols);CHKERRQ(ierr); 2059 ierr = PetscFree(rowners);CHKERRQ(ierr); 2060 2061 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2062 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2063 *newmat = A; 2064 PetscFunctionReturn(0); 2065 } 2066 2067 #undef __FUNCT__ 2068 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2069 /* 2070 Not great since it makes two copies of the submatrix, first an SeqAIJ 2071 in local and then by concatenating the local matrices the end result. 2072 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2073 */ 2074 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2075 { 2076 PetscErrorCode ierr; 2077 PetscMPIInt rank,size; 2078 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2079 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2080 Mat *local,M,Mreuse; 2081 PetscScalar *vwork,*aa; 2082 MPI_Comm comm = mat->comm; 2083 Mat_SeqAIJ *aij; 2084 2085 2086 PetscFunctionBegin; 2087 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2088 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2089 2090 if (call == MAT_REUSE_MATRIX) { 2091 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2092 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2093 local = &Mreuse; 2094 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2095 } else { 2096 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2097 Mreuse = *local; 2098 ierr = PetscFree(local);CHKERRQ(ierr); 2099 } 2100 2101 /* 2102 m - number of local rows 2103 n - number of columns (same on all processors) 2104 rstart - first row in new global matrix generated 2105 */ 2106 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2107 if (call == MAT_INITIAL_MATRIX) { 2108 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2109 ii = aij->i; 2110 jj = aij->j; 2111 2112 /* 2113 Determine the number of non-zeros in the diagonal and off-diagonal 2114 portions of the matrix in order to do correct preallocation 2115 */ 2116 2117 /* first get start and end of "diagonal" columns */ 2118 if (csize == PETSC_DECIDE) { 2119 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2120 if (mglobal == n) { /* square matrix */ 2121 nlocal = m; 2122 } else { 2123 nlocal = n/size + ((n % size) > rank); 2124 } 2125 } else { 2126 nlocal = csize; 2127 } 2128 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2129 rstart = rend - nlocal; 2130 if (rank == size - 1 && rend != n) { 2131 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2132 } 2133 2134 /* next, compute all the lengths */ 2135 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2136 olens = dlens + m; 2137 for (i=0; i<m; i++) { 2138 jend = ii[i+1] - ii[i]; 2139 olen = 0; 2140 dlen = 0; 2141 for (j=0; j<jend; j++) { 2142 if (*jj < rstart || *jj >= rend) olen++; 2143 else dlen++; 2144 jj++; 2145 } 2146 olens[i] = olen; 2147 dlens[i] = dlen; 2148 } 2149 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2150 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2151 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2152 ierr = PetscFree(dlens);CHKERRQ(ierr); 2153 } else { 2154 PetscInt ml,nl; 2155 2156 M = *newmat; 2157 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2158 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2159 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2160 /* 2161 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2162 rather than the slower MatSetValues(). 2163 */ 2164 M->was_assembled = PETSC_TRUE; 2165 M->assembled = PETSC_FALSE; 2166 } 2167 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2168 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2169 ii = aij->i; 2170 jj = aij->j; 2171 aa = aij->a; 2172 for (i=0; i<m; i++) { 2173 row = rstart + i; 2174 nz = ii[i+1] - ii[i]; 2175 cwork = jj; jj += nz; 2176 vwork = aa; aa += nz; 2177 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2178 } 2179 2180 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2181 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2182 *newmat = M; 2183 2184 /* save submatrix used in processor for next request */ 2185 if (call == MAT_INITIAL_MATRIX) { 2186 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2187 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2188 } 2189 2190 PetscFunctionReturn(0); 2191 } 2192 2193 EXTERN_C_BEGIN 2194 #undef __FUNCT__ 2195 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2196 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2197 { 2198 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2199 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2200 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2201 const PetscInt *JJ; 2202 PetscScalar *values; 2203 PetscErrorCode ierr; 2204 2205 PetscFunctionBegin; 2206 #if defined(PETSC_OPT_g) 2207 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2208 #endif 2209 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2210 o_nnz = d_nnz + m; 2211 2212 for (i=0; i<m; i++) { 2213 nnz = I[i+1]- I[i]; 2214 JJ = J + I[i]; 2215 nnz_max = PetscMax(nnz_max,nnz); 2216 #if defined(PETSC_OPT_g) 2217 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2218 #endif 2219 for (j=0; j<nnz; j++) { 2220 if (*JJ >= cstart) break; 2221 JJ++; 2222 } 2223 d = 0; 2224 for (; j<nnz; j++) { 2225 if (*JJ++ >= cend) break; 2226 d++; 2227 } 2228 d_nnz[i] = d; 2229 o_nnz[i] = nnz - d; 2230 } 2231 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2232 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2233 2234 if (v) values = (PetscScalar*)v; 2235 else { 2236 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2237 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2238 } 2239 2240 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2241 for (i=0; i<m; i++) { 2242 ii = i + rstart; 2243 nnz = I[i+1]- I[i]; 2244 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2245 } 2246 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2247 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2248 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2249 2250 if (!v) { 2251 ierr = PetscFree(values);CHKERRQ(ierr); 2252 } 2253 PetscFunctionReturn(0); 2254 } 2255 EXTERN_C_END 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2259 /*@C 2260 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2261 (the default parallel PETSc format). 2262 2263 Collective on MPI_Comm 2264 2265 Input Parameters: 2266 + A - the matrix 2267 . i - the indices into j for the start of each local row (starts with zero) 2268 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2269 - v - optional values in the matrix 2270 2271 Level: developer 2272 2273 .keywords: matrix, aij, compressed row, sparse, parallel 2274 2275 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2276 @*/ 2277 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2278 { 2279 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2280 2281 PetscFunctionBegin; 2282 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2283 if (f) { 2284 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2285 } 2286 PetscFunctionReturn(0); 2287 } 2288 2289 #undef __FUNCT__ 2290 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2291 /*@C 2292 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2293 (the default parallel PETSc format). For good matrix assembly performance 2294 the user should preallocate the matrix storage by setting the parameters 2295 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2296 performance can be increased by more than a factor of 50. 2297 2298 Collective on MPI_Comm 2299 2300 Input Parameters: 2301 + A - the matrix 2302 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2303 (same value is used for all local rows) 2304 . d_nnz - array containing the number of nonzeros in the various rows of the 2305 DIAGONAL portion of the local submatrix (possibly different for each row) 2306 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2307 The size of this array is equal to the number of local rows, i.e 'm'. 2308 You must leave room for the diagonal entry even if it is zero. 2309 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2310 submatrix (same value is used for all local rows). 2311 - o_nnz - array containing the number of nonzeros in the various rows of the 2312 OFF-DIAGONAL portion of the local submatrix (possibly different for 2313 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2314 structure. The size of this array is equal to the number 2315 of local rows, i.e 'm'. 2316 2317 If the *_nnz parameter is given then the *_nz parameter is ignored 2318 2319 The AIJ format (also called the Yale sparse matrix format or 2320 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2321 storage. The stored row and column indices begin with zero. See the users manual for details. 2322 2323 The parallel matrix is partitioned such that the first m0 rows belong to 2324 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2325 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2326 2327 The DIAGONAL portion of the local submatrix of a processor can be defined 2328 as the submatrix which is obtained by extraction the part corresponding 2329 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2330 first row that belongs to the processor, and r2 is the last row belonging 2331 to the this processor. This is a square mxm matrix. The remaining portion 2332 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2333 2334 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2335 2336 Example usage: 2337 2338 Consider the following 8x8 matrix with 34 non-zero values, that is 2339 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2340 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2341 as follows: 2342 2343 .vb 2344 1 2 0 | 0 3 0 | 0 4 2345 Proc0 0 5 6 | 7 0 0 | 8 0 2346 9 0 10 | 11 0 0 | 12 0 2347 ------------------------------------- 2348 13 0 14 | 15 16 17 | 0 0 2349 Proc1 0 18 0 | 19 20 21 | 0 0 2350 0 0 0 | 22 23 0 | 24 0 2351 ------------------------------------- 2352 Proc2 25 26 27 | 0 0 28 | 29 0 2353 30 0 0 | 31 32 33 | 0 34 2354 .ve 2355 2356 This can be represented as a collection of submatrices as: 2357 2358 .vb 2359 A B C 2360 D E F 2361 G H I 2362 .ve 2363 2364 Where the submatrices A,B,C are owned by proc0, D,E,F are 2365 owned by proc1, G,H,I are owned by proc2. 2366 2367 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2368 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2369 The 'M','N' parameters are 8,8, and have the same values on all procs. 2370 2371 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2372 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2373 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2374 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2375 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2376 matrix, ans [DF] as another SeqAIJ matrix. 2377 2378 When d_nz, o_nz parameters are specified, d_nz storage elements are 2379 allocated for every row of the local diagonal submatrix, and o_nz 2380 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2381 One way to choose d_nz and o_nz is to use the max nonzerors per local 2382 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2383 In this case, the values of d_nz,o_nz are: 2384 .vb 2385 proc0 : dnz = 2, o_nz = 2 2386 proc1 : dnz = 3, o_nz = 2 2387 proc2 : dnz = 1, o_nz = 4 2388 .ve 2389 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2390 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2391 for proc3. i.e we are using 12+15+10=37 storage locations to store 2392 34 values. 2393 2394 When d_nnz, o_nnz parameters are specified, the storage is specified 2395 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2396 In the above case the values for d_nnz,o_nnz are: 2397 .vb 2398 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2399 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2400 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2401 .ve 2402 Here the space allocated is sum of all the above values i.e 34, and 2403 hence pre-allocation is perfect. 2404 2405 Level: intermediate 2406 2407 .keywords: matrix, aij, compressed row, sparse, parallel 2408 2409 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2410 MPIAIJ 2411 @*/ 2412 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2413 { 2414 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2415 2416 PetscFunctionBegin; 2417 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2418 if (f) { 2419 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2420 } 2421 PetscFunctionReturn(0); 2422 } 2423 2424 #undef __FUNCT__ 2425 #define __FUNCT__ "MatCreateMPIAIJ" 2426 /*@C 2427 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2428 (the default parallel PETSc format). For good matrix assembly performance 2429 the user should preallocate the matrix storage by setting the parameters 2430 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2431 performance can be increased by more than a factor of 50. 2432 2433 Collective on MPI_Comm 2434 2435 Input Parameters: 2436 + comm - MPI communicator 2437 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2438 This value should be the same as the local size used in creating the 2439 y vector for the matrix-vector product y = Ax. 2440 . n - This value should be the same as the local size used in creating the 2441 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2442 calculated if N is given) For square matrices n is almost always m. 2443 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2444 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2445 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2446 (same value is used for all local rows) 2447 . d_nnz - array containing the number of nonzeros in the various rows of the 2448 DIAGONAL portion of the local submatrix (possibly different for each row) 2449 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2450 The size of this array is equal to the number of local rows, i.e 'm'. 2451 You must leave room for the diagonal entry even if it is zero. 2452 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2453 submatrix (same value is used for all local rows). 2454 - o_nnz - array containing the number of nonzeros in the various rows of the 2455 OFF-DIAGONAL portion of the local submatrix (possibly different for 2456 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2457 structure. The size of this array is equal to the number 2458 of local rows, i.e 'm'. 2459 2460 Output Parameter: 2461 . A - the matrix 2462 2463 Notes: 2464 If the *_nnz parameter is given then the *_nz parameter is ignored 2465 2466 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2467 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2468 storage requirements for this matrix. 2469 2470 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2471 processor than it must be used on all processors that share the object for 2472 that argument. 2473 2474 The user MUST specify either the local or global matrix dimensions 2475 (possibly both). 2476 2477 The parallel matrix is partitioned such that the first m0 rows belong to 2478 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2479 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2480 2481 The DIAGONAL portion of the local submatrix of a processor can be defined 2482 as the submatrix which is obtained by extraction the part corresponding 2483 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2484 first row that belongs to the processor, and r2 is the last row belonging 2485 to the this processor. This is a square mxm matrix. The remaining portion 2486 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2487 2488 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2489 2490 When calling this routine with a single process communicator, a matrix of 2491 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2492 type of communicator, use the construction mechanism: 2493 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2494 2495 By default, this format uses inodes (identical nodes) when possible. 2496 We search for consecutive rows with the same nonzero structure, thereby 2497 reusing matrix information to achieve increased efficiency. 2498 2499 Options Database Keys: 2500 + -mat_aij_no_inode - Do not use inodes 2501 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2502 - -mat_aij_oneindex - Internally use indexing starting at 1 2503 rather than 0. Note that when calling MatSetValues(), 2504 the user still MUST index entries starting at 0! 2505 2506 2507 Example usage: 2508 2509 Consider the following 8x8 matrix with 34 non-zero values, that is 2510 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2511 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2512 as follows: 2513 2514 .vb 2515 1 2 0 | 0 3 0 | 0 4 2516 Proc0 0 5 6 | 7 0 0 | 8 0 2517 9 0 10 | 11 0 0 | 12 0 2518 ------------------------------------- 2519 13 0 14 | 15 16 17 | 0 0 2520 Proc1 0 18 0 | 19 20 21 | 0 0 2521 0 0 0 | 22 23 0 | 24 0 2522 ------------------------------------- 2523 Proc2 25 26 27 | 0 0 28 | 29 0 2524 30 0 0 | 31 32 33 | 0 34 2525 .ve 2526 2527 This can be represented as a collection of submatrices as: 2528 2529 .vb 2530 A B C 2531 D E F 2532 G H I 2533 .ve 2534 2535 Where the submatrices A,B,C are owned by proc0, D,E,F are 2536 owned by proc1, G,H,I are owned by proc2. 2537 2538 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2539 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2540 The 'M','N' parameters are 8,8, and have the same values on all procs. 2541 2542 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2543 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2544 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2545 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2546 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2547 matrix, ans [DF] as another SeqAIJ matrix. 2548 2549 When d_nz, o_nz parameters are specified, d_nz storage elements are 2550 allocated for every row of the local diagonal submatrix, and o_nz 2551 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2552 One way to choose d_nz and o_nz is to use the max nonzerors per local 2553 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2554 In this case, the values of d_nz,o_nz are: 2555 .vb 2556 proc0 : dnz = 2, o_nz = 2 2557 proc1 : dnz = 3, o_nz = 2 2558 proc2 : dnz = 1, o_nz = 4 2559 .ve 2560 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2561 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2562 for proc3. i.e we are using 12+15+10=37 storage locations to store 2563 34 values. 2564 2565 When d_nnz, o_nnz parameters are specified, the storage is specified 2566 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2567 In the above case the values for d_nnz,o_nnz are: 2568 .vb 2569 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2570 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2571 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2572 .ve 2573 Here the space allocated is sum of all the above values i.e 34, and 2574 hence pre-allocation is perfect. 2575 2576 Level: intermediate 2577 2578 .keywords: matrix, aij, compressed row, sparse, parallel 2579 2580 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2581 MPIAIJ 2582 @*/ 2583 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2584 { 2585 PetscErrorCode ierr; 2586 PetscMPIInt size; 2587 2588 PetscFunctionBegin; 2589 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2590 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2591 if (size > 1) { 2592 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2593 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2594 } else { 2595 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2596 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2597 } 2598 PetscFunctionReturn(0); 2599 } 2600 2601 #undef __FUNCT__ 2602 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2603 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2604 { 2605 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2606 2607 PetscFunctionBegin; 2608 *Ad = a->A; 2609 *Ao = a->B; 2610 *colmap = a->garray; 2611 PetscFunctionReturn(0); 2612 } 2613 2614 #undef __FUNCT__ 2615 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2616 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2617 { 2618 PetscErrorCode ierr; 2619 PetscInt i; 2620 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2621 2622 PetscFunctionBegin; 2623 if (coloring->ctype == IS_COLORING_LOCAL) { 2624 ISColoringValue *allcolors,*colors; 2625 ISColoring ocoloring; 2626 2627 /* set coloring for diagonal portion */ 2628 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2629 2630 /* set coloring for off-diagonal portion */ 2631 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2632 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2633 for (i=0; i<a->B->n; i++) { 2634 colors[i] = allcolors[a->garray[i]]; 2635 } 2636 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2637 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2638 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2639 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2640 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2641 ISColoringValue *colors; 2642 PetscInt *larray; 2643 ISColoring ocoloring; 2644 2645 /* set coloring for diagonal portion */ 2646 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2647 for (i=0; i<a->A->n; i++) { 2648 larray[i] = i + a->cstart; 2649 } 2650 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2651 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2652 for (i=0; i<a->A->n; i++) { 2653 colors[i] = coloring->colors[larray[i]]; 2654 } 2655 ierr = PetscFree(larray);CHKERRQ(ierr); 2656 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2657 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2658 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2659 2660 /* set coloring for off-diagonal portion */ 2661 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2662 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2663 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2664 for (i=0; i<a->B->n; i++) { 2665 colors[i] = coloring->colors[larray[i]]; 2666 } 2667 ierr = PetscFree(larray);CHKERRQ(ierr); 2668 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2669 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2670 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2671 } else { 2672 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2673 } 2674 2675 PetscFunctionReturn(0); 2676 } 2677 2678 #undef __FUNCT__ 2679 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2680 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2681 { 2682 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2683 PetscErrorCode ierr; 2684 2685 PetscFunctionBegin; 2686 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2687 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2688 PetscFunctionReturn(0); 2689 } 2690 2691 #undef __FUNCT__ 2692 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2693 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2694 { 2695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2696 PetscErrorCode ierr; 2697 2698 PetscFunctionBegin; 2699 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2700 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2701 PetscFunctionReturn(0); 2702 } 2703 2704 #undef __FUNCT__ 2705 #define __FUNCT__ "MatMerge" 2706 /*@C 2707 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2708 matrices from each processor 2709 2710 Collective on MPI_Comm 2711 2712 Input Parameters: 2713 + comm - the communicators the parallel matrix will live on 2714 . inmat - the input sequential matrices 2715 . n - number of local columns (or PETSC_DECIDE) 2716 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2717 2718 Output Parameter: 2719 . outmat - the parallel matrix generated 2720 2721 Level: advanced 2722 2723 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2724 2725 @*/ 2726 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2727 { 2728 PetscErrorCode ierr; 2729 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2730 PetscInt *indx; 2731 PetscScalar *values; 2732 PetscMap columnmap,rowmap; 2733 2734 PetscFunctionBegin; 2735 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2736 /* 2737 PetscMPIInt rank; 2738 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2739 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2740 */ 2741 if (scall == MAT_INITIAL_MATRIX){ 2742 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2743 if (n == PETSC_DECIDE){ 2744 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2745 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2746 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2747 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2748 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2749 } 2750 2751 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2752 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2753 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2754 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2755 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2756 2757 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2758 for (i=0;i<m;i++) { 2759 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2760 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2761 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2762 } 2763 /* This routine will ONLY return MPIAIJ type matrix */ 2764 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2765 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2766 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2767 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2768 2769 } else if (scall == MAT_REUSE_MATRIX){ 2770 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2771 } else { 2772 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2773 } 2774 2775 for (i=0;i<m;i++) { 2776 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2777 I = i + rstart; 2778 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2779 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2780 } 2781 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2782 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2783 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2784 2785 PetscFunctionReturn(0); 2786 } 2787 2788 #undef __FUNCT__ 2789 #define __FUNCT__ "MatFileSplit" 2790 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2791 { 2792 PetscErrorCode ierr; 2793 PetscMPIInt rank; 2794 PetscInt m,N,i,rstart,nnz; 2795 size_t len; 2796 const PetscInt *indx; 2797 PetscViewer out; 2798 char *name; 2799 Mat B; 2800 const PetscScalar *values; 2801 2802 PetscFunctionBegin; 2803 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2804 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2805 /* Should this be the type of the diagonal block of A? */ 2806 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2807 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2808 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2809 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2810 for (i=0;i<m;i++) { 2811 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2812 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2813 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2814 } 2815 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2816 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2817 2818 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2819 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2820 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2821 sprintf(name,"%s.%d",outfile,rank); 2822 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2823 ierr = PetscFree(name); 2824 ierr = MatView(B,out);CHKERRQ(ierr); 2825 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2826 ierr = MatDestroy(B);CHKERRQ(ierr); 2827 PetscFunctionReturn(0); 2828 } 2829 2830 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2831 #undef __FUNCT__ 2832 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2833 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2834 { 2835 PetscErrorCode ierr; 2836 Mat_Merge_SeqsToMPI *merge; 2837 PetscObjectContainer container; 2838 2839 PetscFunctionBegin; 2840 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2841 if (container) { 2842 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2843 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2844 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2845 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2846 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2847 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2848 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2849 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2850 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2851 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2852 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2853 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2854 2855 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2856 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2857 } 2858 ierr = PetscFree(merge);CHKERRQ(ierr); 2859 2860 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2861 PetscFunctionReturn(0); 2862 } 2863 2864 #include "src/mat/utils/freespace.h" 2865 #include "petscbt.h" 2866 #undef __FUNCT__ 2867 #define __FUNCT__ "MatMerge_SeqsToMPI" 2868 /*@C 2869 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2870 matrices from each processor 2871 2872 Collective on MPI_Comm 2873 2874 Input Parameters: 2875 + comm - the communicators the parallel matrix will live on 2876 . seqmat - the input sequential matrices 2877 . m - number of local rows (or PETSC_DECIDE) 2878 . n - number of local columns (or PETSC_DECIDE) 2879 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2880 2881 Output Parameter: 2882 . mpimat - the parallel matrix generated 2883 2884 Level: advanced 2885 2886 Notes: 2887 The dimensions of the sequential matrix in each processor MUST be the same. 2888 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2889 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2890 @*/ 2891 static PetscEvent logkey_seqstompinum = 0; 2892 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2893 { 2894 PetscErrorCode ierr; 2895 MPI_Comm comm=mpimat->comm; 2896 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2897 PetscMPIInt size,rank,taga,*len_s; 2898 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2899 PetscInt proc,m; 2900 PetscInt **buf_ri,**buf_rj; 2901 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2902 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2903 MPI_Request *s_waits,*r_waits; 2904 MPI_Status *status; 2905 MatScalar *aa=a->a,**abuf_r,*ba_i; 2906 Mat_Merge_SeqsToMPI *merge; 2907 PetscObjectContainer container; 2908 2909 PetscFunctionBegin; 2910 if (!logkey_seqstompinum) { 2911 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2912 } 2913 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2914 2915 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2916 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2917 2918 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2919 if (container) { 2920 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2921 } 2922 bi = merge->bi; 2923 bj = merge->bj; 2924 buf_ri = merge->buf_ri; 2925 buf_rj = merge->buf_rj; 2926 2927 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2928 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2929 len_s = merge->len_s; 2930 2931 /* send and recv matrix values */ 2932 /*-----------------------------*/ 2933 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2934 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2935 2936 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2937 for (proc=0,k=0; proc<size; proc++){ 2938 if (!len_s[proc]) continue; 2939 i = owners[proc]; 2940 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2941 k++; 2942 } 2943 2944 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2945 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2946 ierr = PetscFree(status);CHKERRQ(ierr); 2947 2948 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2949 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2950 2951 /* insert mat values of mpimat */ 2952 /*----------------------------*/ 2953 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2954 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2955 nextrow = buf_ri_k + merge->nrecv; 2956 nextai = nextrow + merge->nrecv; 2957 2958 for (k=0; k<merge->nrecv; k++){ 2959 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2960 nrows = *(buf_ri_k[k]); 2961 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2962 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2963 } 2964 2965 /* set values of ba */ 2966 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2967 for (i=0; i<m; i++) { 2968 arow = owners[rank] + i; 2969 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2970 bnzi = bi[i+1] - bi[i]; 2971 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2972 2973 /* add local non-zero vals of this proc's seqmat into ba */ 2974 anzi = ai[arow+1] - ai[arow]; 2975 aj = a->j + ai[arow]; 2976 aa = a->a + ai[arow]; 2977 nextaj = 0; 2978 for (j=0; nextaj<anzi; j++){ 2979 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2980 ba_i[j] += aa[nextaj++]; 2981 } 2982 } 2983 2984 /* add received vals into ba */ 2985 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2986 /* i-th row */ 2987 if (i == *nextrow[k]) { 2988 anzi = *(nextai[k]+1) - *nextai[k]; 2989 aj = buf_rj[k] + *(nextai[k]); 2990 aa = abuf_r[k] + *(nextai[k]); 2991 nextaj = 0; 2992 for (j=0; nextaj<anzi; j++){ 2993 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2994 ba_i[j] += aa[nextaj++]; 2995 } 2996 } 2997 nextrow[k]++; nextai[k]++; 2998 } 2999 } 3000 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3001 } 3002 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3003 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3004 3005 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3006 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3007 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3008 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3009 PetscFunctionReturn(0); 3010 } 3011 static PetscEvent logkey_seqstompisym = 0; 3012 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3013 { 3014 PetscErrorCode ierr; 3015 Mat B_mpi; 3016 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3017 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3018 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3019 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3020 PetscInt len,proc,*dnz,*onz; 3021 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3022 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3023 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3024 MPI_Status *status; 3025 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3026 PetscBT lnkbt; 3027 Mat_Merge_SeqsToMPI *merge; 3028 PetscObjectContainer container; 3029 3030 PetscFunctionBegin; 3031 if (!logkey_seqstompisym) { 3032 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3033 } 3034 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3035 3036 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3037 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3038 3039 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3040 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3041 3042 /* determine row ownership */ 3043 /*---------------------------------------------------------*/ 3044 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3045 if (m == PETSC_DECIDE) { 3046 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3047 } else { 3048 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3049 } 3050 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3051 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3052 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3053 3054 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3055 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3056 3057 /* determine the number of messages to send, their lengths */ 3058 /*---------------------------------------------------------*/ 3059 len_s = merge->len_s; 3060 3061 len = 0; /* length of buf_si[] */ 3062 merge->nsend = 0; 3063 for (proc=0; proc<size; proc++){ 3064 len_si[proc] = 0; 3065 if (proc == rank){ 3066 len_s[proc] = 0; 3067 } else { 3068 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3069 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3070 } 3071 if (len_s[proc]) { 3072 merge->nsend++; 3073 nrows = 0; 3074 for (i=owners[proc]; i<owners[proc+1]; i++){ 3075 if (ai[i+1] > ai[i]) nrows++; 3076 } 3077 len_si[proc] = 2*(nrows+1); 3078 len += len_si[proc]; 3079 } 3080 } 3081 3082 /* determine the number and length of messages to receive for ij-structure */ 3083 /*-------------------------------------------------------------------------*/ 3084 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3085 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3086 3087 /* post the Irecv of j-structure */ 3088 /*-------------------------------*/ 3089 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3090 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3091 3092 /* post the Isend of j-structure */ 3093 /*--------------------------------*/ 3094 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3095 sj_waits = si_waits + merge->nsend; 3096 3097 for (proc=0, k=0; proc<size; proc++){ 3098 if (!len_s[proc]) continue; 3099 i = owners[proc]; 3100 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3101 k++; 3102 } 3103 3104 /* receives and sends of j-structure are complete */ 3105 /*------------------------------------------------*/ 3106 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3107 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3108 3109 /* send and recv i-structure */ 3110 /*---------------------------*/ 3111 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3112 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3113 3114 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3115 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3116 for (proc=0,k=0; proc<size; proc++){ 3117 if (!len_s[proc]) continue; 3118 /* form outgoing message for i-structure: 3119 buf_si[0]: nrows to be sent 3120 [1:nrows]: row index (global) 3121 [nrows+1:2*nrows+1]: i-structure index 3122 */ 3123 /*-------------------------------------------*/ 3124 nrows = len_si[proc]/2 - 1; 3125 buf_si_i = buf_si + nrows+1; 3126 buf_si[0] = nrows; 3127 buf_si_i[0] = 0; 3128 nrows = 0; 3129 for (i=owners[proc]; i<owners[proc+1]; i++){ 3130 anzi = ai[i+1] - ai[i]; 3131 if (anzi) { 3132 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3133 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3134 nrows++; 3135 } 3136 } 3137 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3138 k++; 3139 buf_si += len_si[proc]; 3140 } 3141 3142 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3143 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3144 3145 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3146 for (i=0; i<merge->nrecv; i++){ 3147 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3148 } 3149 3150 ierr = PetscFree(len_si);CHKERRQ(ierr); 3151 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3152 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3153 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3154 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3155 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3156 ierr = PetscFree(status);CHKERRQ(ierr); 3157 3158 /* compute a local seq matrix in each processor */ 3159 /*----------------------------------------------*/ 3160 /* allocate bi array and free space for accumulating nonzero column info */ 3161 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3162 bi[0] = 0; 3163 3164 /* create and initialize a linked list */ 3165 nlnk = N+1; 3166 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3167 3168 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3169 len = 0; 3170 len = ai[owners[rank+1]] - ai[owners[rank]]; 3171 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3172 current_space = free_space; 3173 3174 /* determine symbolic info for each local row */ 3175 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3176 nextrow = buf_ri_k + merge->nrecv; 3177 nextai = nextrow + merge->nrecv; 3178 for (k=0; k<merge->nrecv; k++){ 3179 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3180 nrows = *buf_ri_k[k]; 3181 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3182 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3183 } 3184 3185 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3186 len = 0; 3187 for (i=0;i<m;i++) { 3188 bnzi = 0; 3189 /* add local non-zero cols of this proc's seqmat into lnk */ 3190 arow = owners[rank] + i; 3191 anzi = ai[arow+1] - ai[arow]; 3192 aj = a->j + ai[arow]; 3193 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3194 bnzi += nlnk; 3195 /* add received col data into lnk */ 3196 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3197 if (i == *nextrow[k]) { /* i-th row */ 3198 anzi = *(nextai[k]+1) - *nextai[k]; 3199 aj = buf_rj[k] + *nextai[k]; 3200 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3201 bnzi += nlnk; 3202 nextrow[k]++; nextai[k]++; 3203 } 3204 } 3205 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3206 3207 /* if free space is not available, make more free space */ 3208 if (current_space->local_remaining<bnzi) { 3209 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3210 nspacedouble++; 3211 } 3212 /* copy data into free space, then initialize lnk */ 3213 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3214 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3215 3216 current_space->array += bnzi; 3217 current_space->local_used += bnzi; 3218 current_space->local_remaining -= bnzi; 3219 3220 bi[i+1] = bi[i] + bnzi; 3221 } 3222 3223 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3224 3225 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3226 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3227 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3228 3229 /* create symbolic parallel matrix B_mpi */ 3230 /*---------------------------------------*/ 3231 if (n==PETSC_DECIDE) { 3232 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3233 } else { 3234 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3235 } 3236 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3237 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3238 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3239 3240 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3241 B_mpi->assembled = PETSC_FALSE; 3242 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3243 merge->bi = bi; 3244 merge->bj = bj; 3245 merge->buf_ri = buf_ri; 3246 merge->buf_rj = buf_rj; 3247 merge->coi = PETSC_NULL; 3248 merge->coj = PETSC_NULL; 3249 merge->owners_co = PETSC_NULL; 3250 3251 /* attach the supporting struct to B_mpi for reuse */ 3252 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3253 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3254 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3255 *mpimat = B_mpi; 3256 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3257 PetscFunctionReturn(0); 3258 } 3259 3260 static PetscEvent logkey_seqstompi = 0; 3261 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3262 { 3263 PetscErrorCode ierr; 3264 3265 PetscFunctionBegin; 3266 if (!logkey_seqstompi) { 3267 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3268 } 3269 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3270 if (scall == MAT_INITIAL_MATRIX){ 3271 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3272 } 3273 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3274 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3275 PetscFunctionReturn(0); 3276 } 3277 static PetscEvent logkey_getlocalmat = 0; 3278 #undef __FUNCT__ 3279 #define __FUNCT__ "MatGetLocalMat" 3280 /*@C 3281 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3282 3283 Not Collective 3284 3285 Input Parameters: 3286 + A - the matrix 3287 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3288 3289 Output Parameter: 3290 . A_loc - the local sequential matrix generated 3291 3292 Level: developer 3293 3294 @*/ 3295 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3296 { 3297 PetscErrorCode ierr; 3298 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3299 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3300 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3301 PetscScalar *aa=a->a,*ba=b->a,*ca; 3302 PetscInt am=A->m,i,j,k,cstart=mpimat->cstart; 3303 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3304 3305 PetscFunctionBegin; 3306 if (!logkey_getlocalmat) { 3307 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3308 } 3309 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3310 if (scall == MAT_INITIAL_MATRIX){ 3311 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3312 ci[0] = 0; 3313 for (i=0; i<am; i++){ 3314 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3315 } 3316 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3317 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3318 k = 0; 3319 for (i=0; i<am; i++) { 3320 ncols_o = bi[i+1] - bi[i]; 3321 ncols_d = ai[i+1] - ai[i]; 3322 /* off-diagonal portion of A */ 3323 for (jo=0; jo<ncols_o; jo++) { 3324 col = cmap[*bj]; 3325 if (col >= cstart) break; 3326 cj[k] = col; bj++; 3327 ca[k++] = *ba++; 3328 } 3329 /* diagonal portion of A */ 3330 for (j=0; j<ncols_d; j++) { 3331 cj[k] = cstart + *aj++; 3332 ca[k++] = *aa++; 3333 } 3334 /* off-diagonal portion of A */ 3335 for (j=jo; j<ncols_o; j++) { 3336 cj[k] = cmap[*bj++]; 3337 ca[k++] = *ba++; 3338 } 3339 } 3340 /* put together the new matrix */ 3341 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3342 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3343 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3344 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3345 mat->freedata = PETSC_TRUE; 3346 mat->nonew = 0; 3347 } else if (scall == MAT_REUSE_MATRIX){ 3348 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3349 ci = mat->i; cj = mat->j; ca = mat->a; 3350 for (i=0; i<am; i++) { 3351 /* off-diagonal portion of A */ 3352 ncols_o = bi[i+1] - bi[i]; 3353 for (jo=0; jo<ncols_o; jo++) { 3354 col = cmap[*bj]; 3355 if (col >= cstart) break; 3356 *ca++ = *ba++; bj++; 3357 } 3358 /* diagonal portion of A */ 3359 ncols_d = ai[i+1] - ai[i]; 3360 for (j=0; j<ncols_d; j++) *ca++ = *aa++; 3361 /* off-diagonal portion of A */ 3362 for (j=jo; j<ncols_o; j++) { 3363 *ca++ = *ba++; bj++; 3364 } 3365 } 3366 } else { 3367 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3368 } 3369 3370 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3371 PetscFunctionReturn(0); 3372 } 3373 3374 static PetscEvent logkey_getlocalmatcondensed = 0; 3375 #undef __FUNCT__ 3376 #define __FUNCT__ "MatGetLocalMatCondensed" 3377 /*@C 3378 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3379 3380 Not Collective 3381 3382 Input Parameters: 3383 + A - the matrix 3384 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3385 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3386 3387 Output Parameter: 3388 . A_loc - the local sequential matrix generated 3389 3390 Level: developer 3391 3392 @*/ 3393 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3394 { 3395 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3396 PetscErrorCode ierr; 3397 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3398 IS isrowa,iscola; 3399 Mat *aloc; 3400 3401 PetscFunctionBegin; 3402 if (!logkey_getlocalmatcondensed) { 3403 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3404 } 3405 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3406 if (!row){ 3407 start = a->rstart; end = a->rend; 3408 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3409 } else { 3410 isrowa = *row; 3411 } 3412 if (!col){ 3413 start = a->cstart; 3414 cmap = a->garray; 3415 nzA = a->A->n; 3416 nzB = a->B->n; 3417 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3418 ncols = 0; 3419 for (i=0; i<nzB; i++) { 3420 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3421 else break; 3422 } 3423 imark = i; 3424 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3425 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3426 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3427 ierr = PetscFree(idx);CHKERRQ(ierr); 3428 } else { 3429 iscola = *col; 3430 } 3431 if (scall != MAT_INITIAL_MATRIX){ 3432 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3433 aloc[0] = *A_loc; 3434 } 3435 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3436 *A_loc = aloc[0]; 3437 ierr = PetscFree(aloc);CHKERRQ(ierr); 3438 if (!row){ 3439 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3440 } 3441 if (!col){ 3442 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3443 } 3444 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3445 PetscFunctionReturn(0); 3446 } 3447 3448 static PetscEvent logkey_GetBrowsOfAcols = 0; 3449 #undef __FUNCT__ 3450 #define __FUNCT__ "MatGetBrowsOfAcols" 3451 /*@C 3452 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3453 3454 Collective on Mat 3455 3456 Input Parameters: 3457 + A,B - the matrices in mpiaij format 3458 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3459 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3460 3461 Output Parameter: 3462 + rowb, colb - index sets of rows and columns of B to extract 3463 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3464 - B_seq - the sequential matrix generated 3465 3466 Level: developer 3467 3468 @*/ 3469 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3470 { 3471 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3472 PetscErrorCode ierr; 3473 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3474 IS isrowb,iscolb; 3475 Mat *bseq; 3476 3477 PetscFunctionBegin; 3478 if (a->cstart != b->rstart || a->cend != b->rend){ 3479 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3480 } 3481 if (!logkey_GetBrowsOfAcols) { 3482 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3483 } 3484 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3485 3486 if (scall == MAT_INITIAL_MATRIX){ 3487 start = a->cstart; 3488 cmap = a->garray; 3489 nzA = a->A->n; 3490 nzB = a->B->n; 3491 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3492 ncols = 0; 3493 for (i=0; i<nzB; i++) { /* row < local row index */ 3494 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3495 else break; 3496 } 3497 imark = i; 3498 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3499 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3500 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3501 ierr = PetscFree(idx);CHKERRQ(ierr); 3502 *brstart = imark; 3503 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3504 } else { 3505 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3506 isrowb = *rowb; iscolb = *colb; 3507 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3508 bseq[0] = *B_seq; 3509 } 3510 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3511 *B_seq = bseq[0]; 3512 ierr = PetscFree(bseq);CHKERRQ(ierr); 3513 if (!rowb){ 3514 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3515 } else { 3516 *rowb = isrowb; 3517 } 3518 if (!colb){ 3519 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3520 } else { 3521 *colb = iscolb; 3522 } 3523 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3524 PetscFunctionReturn(0); 3525 } 3526 3527 static PetscEvent logkey_GetBrowsOfAocols = 0; 3528 #undef __FUNCT__ 3529 #define __FUNCT__ "MatGetBrowsOfAoCols" 3530 /*@C 3531 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3532 of the OFF-DIAGONAL portion of local A 3533 3534 Collective on Mat 3535 3536 Input Parameters: 3537 + A,B - the matrices in mpiaij format 3538 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3539 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3540 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3541 3542 Output Parameter: 3543 + B_oth - the sequential matrix generated 3544 3545 Level: developer 3546 3547 @*/ 3548 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3549 { 3550 VecScatter_MPI_General *gen_to,*gen_from; 3551 PetscErrorCode ierr; 3552 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3553 Mat_SeqAIJ *b_oth; 3554 VecScatter ctx=a->Mvctx; 3555 MPI_Comm comm=ctx->comm; 3556 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3557 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3558 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3559 PetscInt i,k,l,nrecvs,nsends,nrows,*rrow,*srow,*rstarts,*rstartsj,*sstarts,*sstartsj,len; 3560 MPI_Request *rwaits,*swaits; 3561 MPI_Status *sstatus,rstatus; 3562 PetscInt *cols; 3563 PetscScalar *vals; 3564 PetscMPIInt j; 3565 3566 PetscFunctionBegin; 3567 if (a->cstart != b->rstart || a->cend != b->rend){ 3568 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3569 } 3570 if (!logkey_GetBrowsOfAocols) { 3571 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3572 } 3573 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3574 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3575 3576 gen_to = (VecScatter_MPI_General*)ctx->todata; 3577 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3578 rvalues = gen_from->values; /* holds the length of sending row */ 3579 svalues = gen_to->values; /* holds the length of receiving row */ 3580 nrecvs = gen_from->n; 3581 nsends = gen_to->n; 3582 rwaits = gen_from->requests; 3583 swaits = gen_to->requests; 3584 rrow = gen_from->indices; /* local row index to be received */ 3585 srow = gen_to->indices; /* local row index to be sent */ 3586 rstarts = gen_from->starts; 3587 sstarts = gen_to->starts; 3588 rprocs = gen_from->procs; 3589 sprocs = gen_to->procs; 3590 sstatus = gen_to->sstatus; 3591 3592 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3593 if (scall == MAT_INITIAL_MATRIX){ 3594 /* i-array */ 3595 /*---------*/ 3596 /* post receives */ 3597 for (i=0; i<nrecvs; i++){ 3598 rowlen = (PetscInt*)rvalues + rstarts[i]; 3599 nrows = rstarts[i+1]-rstarts[i]; 3600 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3601 } 3602 3603 /* pack the outgoing message */ 3604 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3605 rstartsj = sstartsj + nsends +1; 3606 sstartsj[0] = 0; rstartsj[0] = 0; 3607 len = 0; /* total length of j or a array to be sent */ 3608 k = 0; 3609 for (i=0; i<nsends; i++){ 3610 rowlen = (PetscInt*)svalues + sstarts[i]; 3611 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3612 for (j=0; j<nrows; j++) { 3613 row = srow[k] + b->rowners[rank]; /* global row idx */ 3614 ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3615 len += rowlen[j]; 3616 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3617 k++; 3618 } 3619 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3620 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3621 } 3622 /* recvs and sends of i-array are completed */ 3623 i = nrecvs; 3624 while (i--) { 3625 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3626 } 3627 if (nsends) { 3628 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3629 } 3630 /* allocate buffers for sending j and a arrays */ 3631 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3632 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3633 3634 /* create i-array of B_oth */ 3635 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3636 b_othi[0] = 0; 3637 len = 0; /* total length of j or a array to be received */ 3638 k = 0; 3639 for (i=0; i<nrecvs; i++){ 3640 rowlen = (PetscInt*)rvalues + rstarts[i]; 3641 nrows = rstarts[i+1]-rstarts[i]; 3642 for (j=0; j<nrows; j++) { 3643 b_othi[k+1] = b_othi[k] + rowlen[j]; 3644 len += rowlen[j]; k++; 3645 } 3646 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3647 } 3648 3649 /* allocate space for j and a arrrays of B_oth */ 3650 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3651 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3652 3653 /* j-array */ 3654 /*---------*/ 3655 /* post receives of j-array */ 3656 for (i=0; i<nrecvs; i++){ 3657 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3658 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3659 } 3660 k = 0; 3661 for (i=0; i<nsends; i++){ 3662 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3663 bufJ = bufj+sstartsj[i]; 3664 for (j=0; j<nrows; j++) { 3665 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3666 ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3667 for (l=0; l<ncols; l++){ 3668 *bufJ++ = cols[l]; 3669 } 3670 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3671 } 3672 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3673 } 3674 3675 /* recvs and sends of j-array are completed */ 3676 i = nrecvs; 3677 while (i--) { 3678 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3679 } 3680 if (nsends) { 3681 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3682 } 3683 } else if (scall == MAT_REUSE_MATRIX){ 3684 sstartsj = *startsj; 3685 rstartsj = sstartsj + nsends +1; 3686 bufa = *bufa_ptr; 3687 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3688 b_otha = b_oth->a; 3689 } else { 3690 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3691 } 3692 3693 /* a-array */ 3694 /*---------*/ 3695 /* post receives of a-array */ 3696 for (i=0; i<nrecvs; i++){ 3697 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3698 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3699 } 3700 k = 0; 3701 for (i=0; i<nsends; i++){ 3702 nrows = sstarts[i+1]-sstarts[i]; 3703 bufA = bufa+sstartsj[i]; 3704 for (j=0; j<nrows; j++) { 3705 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3706 ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3707 for (l=0; l<ncols; l++){ 3708 *bufA++ = vals[l]; 3709 } 3710 ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3711 3712 } 3713 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3714 } 3715 /* recvs and sends of a-array are completed */ 3716 i = nrecvs; 3717 while (i--) { 3718 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3719 } 3720 if (nsends) { 3721 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3722 } 3723 3724 if (scall == MAT_INITIAL_MATRIX){ 3725 /* put together the new matrix */ 3726 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3727 3728 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3729 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3730 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3731 b_oth->freedata = PETSC_TRUE; 3732 b_oth->nonew = 0; 3733 3734 ierr = PetscFree(bufj);CHKERRQ(ierr); 3735 if (!startsj || !bufa_ptr){ 3736 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3737 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3738 } else { 3739 *startsj = sstartsj; 3740 *bufa_ptr = bufa; 3741 } 3742 } 3743 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3744 3745 PetscFunctionReturn(0); 3746 } 3747 3748 /*MC 3749 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3750 3751 Options Database Keys: 3752 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3753 3754 Level: beginner 3755 3756 .seealso: MatCreateMPIAIJ 3757 M*/ 3758 3759 EXTERN_C_BEGIN 3760 #undef __FUNCT__ 3761 #define __FUNCT__ "MatCreate_MPIAIJ" 3762 PetscErrorCode MatCreate_MPIAIJ(Mat B) 3763 { 3764 Mat_MPIAIJ *b; 3765 PetscErrorCode ierr; 3766 PetscInt i; 3767 PetscMPIInt size; 3768 3769 PetscFunctionBegin; 3770 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3771 3772 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3773 B->data = (void*)b; 3774 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3775 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3776 B->factor = 0; 3777 B->assembled = PETSC_FALSE; 3778 B->mapping = 0; 3779 3780 B->insertmode = NOT_SET_VALUES; 3781 b->size = size; 3782 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3783 3784 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3785 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3786 3787 /* the information in the maps duplicates the information computed below, eventually 3788 we should remove the duplicate information that is not contained in the maps */ 3789 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3790 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3791 3792 /* build local table of row and column ownerships */ 3793 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3794 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 3795 b->cowners = b->rowners + b->size + 2; 3796 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3797 b->rowners[0] = 0; 3798 for (i=2; i<=b->size; i++) { 3799 b->rowners[i] += b->rowners[i-1]; 3800 } 3801 b->rstart = b->rowners[b->rank]; 3802 b->rend = b->rowners[b->rank+1]; 3803 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3804 b->cowners[0] = 0; 3805 for (i=2; i<=b->size; i++) { 3806 b->cowners[i] += b->cowners[i-1]; 3807 } 3808 b->cstart = b->cowners[b->rank]; 3809 b->cend = b->cowners[b->rank+1]; 3810 3811 /* build cache for off array entries formed */ 3812 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3813 b->donotstash = PETSC_FALSE; 3814 b->colmap = 0; 3815 b->garray = 0; 3816 b->roworiented = PETSC_TRUE; 3817 3818 /* stuff used for matrix vector multiply */ 3819 b->lvec = PETSC_NULL; 3820 b->Mvctx = PETSC_NULL; 3821 3822 /* stuff for MatGetRow() */ 3823 b->rowindices = 0; 3824 b->rowvalues = 0; 3825 b->getrowactive = PETSC_FALSE; 3826 3827 /* Explicitly create 2 MATSEQAIJ matrices. */ 3828 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3829 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3830 PetscLogObjectParent(B,b->A); 3831 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3832 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3833 PetscLogObjectParent(B,b->B); 3834 3835 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3836 "MatStoreValues_MPIAIJ", 3837 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3838 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3839 "MatRetrieveValues_MPIAIJ", 3840 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3841 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3842 "MatGetDiagonalBlock_MPIAIJ", 3843 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3844 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3845 "MatIsTranspose_MPIAIJ", 3846 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3847 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3848 "MatMPIAIJSetPreallocation_MPIAIJ", 3849 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3850 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3851 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3852 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3853 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3854 "MatDiagonalScaleLocal_MPIAIJ", 3855 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3856 PetscFunctionReturn(0); 3857 } 3858 EXTERN_C_END 3859