1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (PetscInt*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(PetscInt));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (PetscInt*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(PetscInt));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 PetscErrorCode ierr; 191 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 192 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 PetscInt nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 PetscErrorCode ierr; 277 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 278 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 279 280 PetscFunctionBegin; 281 for (i=0; i<m; i++) { 282 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 283 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 284 if (idxm[i] >= rstart && idxm[i] < rend) { 285 row = idxm[i] - rstart; 286 for (j=0; j<n; j++) { 287 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 288 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 289 if (idxn[j] >= cstart && idxn[j] < cend){ 290 col = idxn[j] - cstart; 291 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 292 } else { 293 if (!aij->colmap) { 294 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined (PETSC_USE_CTABLE) 297 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 298 col --; 299 #else 300 col = aij->colmap[idxn[j]] - 1; 301 #endif 302 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 303 else { 304 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 305 } 306 } 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 310 } 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 318 { 319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 320 PetscErrorCode ierr; 321 PetscInt nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 337 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 338 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 339 PetscFunctionReturn(0); 340 } 341 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 346 { 347 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 348 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 349 PetscErrorCode ierr; 350 PetscMPIInt n; 351 PetscInt i,j,rstart,ncols,flg; 352 PetscInt *row,*col,other_disassembled; 353 PetscScalar *val; 354 InsertMode addv = mat->insertmode; 355 356 PetscFunctionBegin; 357 if (!aij->donotstash) { 358 while (1) { 359 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 360 if (!flg) break; 361 362 for (i=0; i<n;) { 363 /* Now identify the consecutive vals belonging to the same row */ 364 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 365 if (j < n) ncols = j-i; 366 else ncols = n-i; 367 /* Now assemble all these values with a single function call */ 368 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 369 i = j; 370 } 371 } 372 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 373 } 374 375 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 376 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 377 378 /* determine if any processor has disassembled, if so we must 379 also disassemble ourselfs, in order that we may reassemble. */ 380 /* 381 if nonzero structure of submatrix B cannot change then we know that 382 no processor disassembled thus we can skip this stuff 383 */ 384 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 385 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 386 if (mat->was_assembled && !other_disassembled) { 387 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 388 /* reaccess the b because aij->B was changed */ 389 b = (Mat_SeqAIJ *)aij->B->data; 390 } 391 } 392 393 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 398 399 if (aij->rowvalues) { 400 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 401 aij->rowvalues = 0; 402 } 403 404 /* used by MatAXPY() */ 405 a->xtoy = 0; b->xtoy = 0; 406 a->XtoY = 0; b->XtoY = 0; 407 408 PetscFunctionReturn(0); 409 } 410 411 #undef __FUNCT__ 412 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 413 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 414 { 415 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 416 PetscErrorCode ierr; 417 418 PetscFunctionBegin; 419 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 420 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatZeroRows_MPIAIJ" 426 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 427 { 428 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 429 PetscErrorCode ierr; 430 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag; 431 PetscInt i,N,*rows,*owners = l->rowners; 432 PetscInt *nprocs,j,idx,nsends,row; 433 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 434 PetscInt *rvalues,count,base,slen,*source; 435 PetscInt *lens,*lrows,*values,rstart=l->rstart; 436 MPI_Comm comm = A->comm; 437 MPI_Request *send_waits,*recv_waits; 438 MPI_Status recv_status,*send_status; 439 IS istmp; 440 PetscTruth found; 441 442 PetscFunctionBegin; 443 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 444 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 445 446 /* first count number of contributors to each processor */ 447 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 448 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 449 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 450 for (i=0; i<N; i++) { 451 idx = rows[i]; 452 found = PETSC_FALSE; 453 for (j=0; j<size; j++) { 454 if (idx >= owners[j] && idx < owners[j+1]) { 455 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 456 } 457 } 458 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 459 } 460 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 461 462 /* inform other processors of number of messages and max length*/ 463 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 464 465 /* post receives: */ 466 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 467 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 468 for (i=0; i<nrecvs; i++) { 469 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 470 } 471 472 /* do sends: 473 1) starts[i] gives the starting index in svalues for stuff going to 474 the ith processor 475 */ 476 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 477 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 478 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 479 starts[0] = 0; 480 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 481 for (i=0; i<N; i++) { 482 svalues[starts[owner[i]]++] = rows[i]; 483 } 484 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 485 486 starts[0] = 0; 487 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 488 count = 0; 489 for (i=0; i<size; i++) { 490 if (nprocs[2*i+1]) { 491 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 492 } 493 } 494 ierr = PetscFree(starts);CHKERRQ(ierr); 495 496 base = owners[rank]; 497 498 /* wait on receives */ 499 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 500 source = lens + nrecvs; 501 count = nrecvs; slen = 0; 502 while (count) { 503 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 504 /* unpack receives into our local space */ 505 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 506 source[imdex] = recv_status.MPI_SOURCE; 507 lens[imdex] = n; 508 slen += n; 509 count--; 510 } 511 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 512 513 /* move the data into the send scatter */ 514 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 515 count = 0; 516 for (i=0; i<nrecvs; i++) { 517 values = rvalues + i*nmax; 518 for (j=0; j<lens[i]; j++) { 519 lrows[count++] = values[j] - base; 520 } 521 } 522 ierr = PetscFree(rvalues);CHKERRQ(ierr); 523 ierr = PetscFree(lens);CHKERRQ(ierr); 524 ierr = PetscFree(owner);CHKERRQ(ierr); 525 ierr = PetscFree(nprocs);CHKERRQ(ierr); 526 527 /* actually zap the local rows */ 528 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 529 PetscLogObjectParent(A,istmp); 530 531 /* 532 Zero the required rows. If the "diagonal block" of the matrix 533 is square and the user wishes to set the diagonal we use seperate 534 code so that MatSetValues() is not called for each diagonal allocating 535 new memory, thus calling lots of mallocs and slowing things down. 536 537 Contributed by: Mathew Knepley 538 */ 539 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 540 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 541 if (diag && (l->A->M == l->A->N)) { 542 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 543 } else if (diag) { 544 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 545 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 546 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 547 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 548 } 549 for (i = 0; i < slen; i++) { 550 row = lrows[i] + rstart; 551 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 552 } 553 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 555 } else { 556 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 557 } 558 ierr = ISDestroy(istmp);CHKERRQ(ierr); 559 ierr = PetscFree(lrows);CHKERRQ(ierr); 560 561 /* wait on sends */ 562 if (nsends) { 563 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 564 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 565 ierr = PetscFree(send_status);CHKERRQ(ierr); 566 } 567 ierr = PetscFree(send_waits);CHKERRQ(ierr); 568 ierr = PetscFree(svalues);CHKERRQ(ierr); 569 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNCT__ 574 #define __FUNCT__ "MatMult_MPIAIJ" 575 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 576 { 577 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 578 PetscErrorCode ierr; 579 PetscInt nt; 580 581 PetscFunctionBegin; 582 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 583 if (nt != A->n) { 584 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 585 } 586 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 587 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 588 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 589 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 590 PetscFunctionReturn(0); 591 } 592 593 #undef __FUNCT__ 594 #define __FUNCT__ "MatMultAdd_MPIAIJ" 595 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 596 { 597 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 598 PetscErrorCode ierr; 599 600 PetscFunctionBegin; 601 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 603 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 604 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 610 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 611 { 612 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 613 PetscErrorCode ierr; 614 615 PetscFunctionBegin; 616 /* do nondiagonal part */ 617 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 618 /* send it on its way */ 619 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 620 /* do local part */ 621 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 622 /* receive remote parts: note this assumes the values are not actually */ 623 /* inserted in yy until the next line, which is true for my implementation*/ 624 /* but is not perhaps always true. */ 625 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 629 EXTERN_C_BEGIN 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 632 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 633 { 634 MPI_Comm comm; 635 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 636 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 637 IS Me,Notme; 638 PetscErrorCode ierr; 639 PetscInt M,N,first,last,*notme,i; 640 PetscMPIInt size; 641 642 PetscFunctionBegin; 643 644 /* Easy test: symmetric diagonal block */ 645 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 646 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 647 if (!*f) PetscFunctionReturn(0); 648 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 649 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 650 if (size == 1) PetscFunctionReturn(0); 651 652 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 653 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 654 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 655 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 656 for (i=0; i<first; i++) notme[i] = i; 657 for (i=last; i<M; i++) notme[i-last+first] = i; 658 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 659 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 660 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 661 Aoff = Aoffs[0]; 662 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 663 Boff = Boffs[0]; 664 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 665 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 666 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 667 ierr = ISDestroy(Me);CHKERRQ(ierr); 668 ierr = ISDestroy(Notme);CHKERRQ(ierr); 669 670 PetscFunctionReturn(0); 671 } 672 EXTERN_C_END 673 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 676 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 677 { 678 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 679 PetscErrorCode ierr; 680 681 PetscFunctionBegin; 682 /* do nondiagonal part */ 683 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 684 /* send it on its way */ 685 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 686 /* do local part */ 687 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 688 /* receive remote parts: note this assumes the values are not actually */ 689 /* inserted in yy until the next line, which is true for my implementation*/ 690 /* but is not perhaps always true. */ 691 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 /* 696 This only works correctly for square matrices where the subblock A->A is the 697 diagonal block 698 */ 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 701 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 702 { 703 PetscErrorCode ierr; 704 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 705 706 PetscFunctionBegin; 707 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 708 if (a->rstart != a->cstart || a->rend != a->cend) { 709 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 710 } 711 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatScale_MPIAIJ" 717 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 720 PetscErrorCode ierr; 721 722 PetscFunctionBegin; 723 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 724 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatDestroy_MPIAIJ" 730 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 731 { 732 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 733 PetscErrorCode ierr; 734 735 PetscFunctionBegin; 736 #if defined(PETSC_USE_LOG) 737 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 738 #endif 739 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 740 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 741 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 742 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 743 #if defined (PETSC_USE_CTABLE) 744 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 745 #else 746 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 747 #endif 748 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 749 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 750 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 751 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 752 ierr = PetscFree(aij);CHKERRQ(ierr); 753 754 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 755 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 756 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 757 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 758 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 759 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 760 PetscFunctionReturn(0); 761 } 762 763 #undef __FUNCT__ 764 #define __FUNCT__ "MatView_MPIAIJ_Binary" 765 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 766 { 767 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 768 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 769 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 770 PetscErrorCode ierr; 771 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 772 int fd; 773 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 774 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 775 PetscScalar *column_values; 776 777 PetscFunctionBegin; 778 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 779 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 780 nz = A->nz + B->nz; 781 if (!rank) { 782 header[0] = MAT_FILE_COOKIE; 783 header[1] = mat->M; 784 header[2] = mat->N; 785 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 786 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 787 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 788 /* get largest number of rows any processor has */ 789 rlen = mat->m; 790 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 791 for (i=1; i<size; i++) { 792 rlen = PetscMax(rlen,range[i+1] - range[i]); 793 } 794 } else { 795 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 796 rlen = mat->m; 797 } 798 799 /* load up the local row counts */ 800 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 801 for (i=0; i<mat->m; i++) { 802 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 803 } 804 805 /* store the row lengths to the file */ 806 if (!rank) { 807 MPI_Status status; 808 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 809 for (i=1; i<size; i++) { 810 rlen = range[i+1] - range[i]; 811 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 812 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 813 } 814 } else { 815 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 816 } 817 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 818 819 /* load up the local column indices */ 820 nzmax = nz; /* )th processor needs space a largest processor needs */ 821 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 822 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 823 cnt = 0; 824 for (i=0; i<mat->m; i++) { 825 for (j=B->i[i]; j<B->i[i+1]; j++) { 826 if ( (col = garray[B->j[j]]) > cstart) break; 827 column_indices[cnt++] = col; 828 } 829 for (k=A->i[i]; k<A->i[i+1]; k++) { 830 column_indices[cnt++] = A->j[k] + cstart; 831 } 832 for (; j<B->i[i+1]; j++) { 833 column_indices[cnt++] = garray[B->j[j]]; 834 } 835 } 836 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 837 838 /* store the column indices to the file */ 839 if (!rank) { 840 MPI_Status status; 841 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 842 for (i=1; i<size; i++) { 843 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 844 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 845 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 846 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 847 } 848 } else { 849 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 850 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 851 } 852 ierr = PetscFree(column_indices);CHKERRQ(ierr); 853 854 /* load up the local column values */ 855 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 856 cnt = 0; 857 for (i=0; i<mat->m; i++) { 858 for (j=B->i[i]; j<B->i[i+1]; j++) { 859 if ( garray[B->j[j]] > cstart) break; 860 column_values[cnt++] = B->a[j]; 861 } 862 for (k=A->i[i]; k<A->i[i+1]; k++) { 863 column_values[cnt++] = A->a[k]; 864 } 865 for (; j<B->i[i+1]; j++) { 866 column_values[cnt++] = B->a[j]; 867 } 868 } 869 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 870 871 /* store the column values to the file */ 872 if (!rank) { 873 MPI_Status status; 874 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 875 for (i=1; i<size; i++) { 876 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 877 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 878 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 879 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 880 } 881 } else { 882 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 883 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 884 } 885 ierr = PetscFree(column_values);CHKERRQ(ierr); 886 PetscFunctionReturn(0); 887 } 888 889 #undef __FUNCT__ 890 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 891 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 892 { 893 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 894 PetscErrorCode ierr; 895 PetscMPIInt rank = aij->rank,size = aij->size; 896 PetscTruth isdraw,iascii,flg,isbinary; 897 PetscViewer sviewer; 898 PetscViewerFormat format; 899 900 PetscFunctionBegin; 901 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 902 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 903 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 904 if (iascii) { 905 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 906 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 907 MatInfo info; 908 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 909 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 910 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 911 if (flg) { 912 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 913 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 914 } else { 915 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 916 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 917 } 918 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 919 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 920 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 921 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 922 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 923 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 924 PetscFunctionReturn(0); 925 } else if (format == PETSC_VIEWER_ASCII_INFO) { 926 PetscFunctionReturn(0); 927 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 928 PetscFunctionReturn(0); 929 } 930 } else if (isbinary) { 931 if (size == 1) { 932 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 933 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 934 } else { 935 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 936 } 937 PetscFunctionReturn(0); 938 } else if (isdraw) { 939 PetscDraw draw; 940 PetscTruth isnull; 941 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 942 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 943 } 944 945 if (size == 1) { 946 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 947 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 948 } else { 949 /* assemble the entire matrix onto first processor. */ 950 Mat A; 951 Mat_SeqAIJ *Aloc; 952 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 953 PetscScalar *a; 954 955 if (!rank) { 956 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 957 } else { 958 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 959 } 960 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 961 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 962 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 963 PetscLogObjectParent(mat,A); 964 965 /* copy over the A part */ 966 Aloc = (Mat_SeqAIJ*)aij->A->data; 967 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 968 row = aij->rstart; 969 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 970 for (i=0; i<m; i++) { 971 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 972 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 973 } 974 aj = Aloc->j; 975 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 976 977 /* copy over the B part */ 978 Aloc = (Mat_SeqAIJ*)aij->B->data; 979 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 980 row = aij->rstart; 981 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 982 ct = cols; 983 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 984 for (i=0; i<m; i++) { 985 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 986 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 987 } 988 ierr = PetscFree(ct);CHKERRQ(ierr); 989 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 990 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 991 /* 992 Everyone has to call to draw the matrix since the graphics waits are 993 synchronized across all processors that share the PetscDraw object 994 */ 995 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 996 if (!rank) { 997 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 998 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 999 } 1000 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1001 ierr = MatDestroy(A);CHKERRQ(ierr); 1002 } 1003 PetscFunctionReturn(0); 1004 } 1005 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "MatView_MPIAIJ" 1008 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1009 { 1010 PetscErrorCode ierr; 1011 PetscTruth iascii,isdraw,issocket,isbinary; 1012 1013 PetscFunctionBegin; 1014 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1015 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1016 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1017 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1018 if (iascii || isdraw || isbinary || issocket) { 1019 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1020 } else { 1021 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1022 } 1023 PetscFunctionReturn(0); 1024 } 1025 1026 1027 1028 #undef __FUNCT__ 1029 #define __FUNCT__ "MatRelax_MPIAIJ" 1030 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1031 { 1032 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1033 PetscErrorCode ierr; 1034 Vec bb1; 1035 PetscScalar mone=-1.0; 1036 1037 PetscFunctionBegin; 1038 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1039 1040 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1041 1042 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1043 if (flag & SOR_ZERO_INITIAL_GUESS) { 1044 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1045 its--; 1046 } 1047 1048 while (its--) { 1049 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1050 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1051 1052 /* update rhs: bb1 = bb - B*x */ 1053 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1054 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1055 1056 /* local sweep */ 1057 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1058 CHKERRQ(ierr); 1059 } 1060 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1061 if (flag & SOR_ZERO_INITIAL_GUESS) { 1062 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1063 its--; 1064 } 1065 while (its--) { 1066 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1067 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1068 1069 /* update rhs: bb1 = bb - B*x */ 1070 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1071 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1072 1073 /* local sweep */ 1074 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1075 CHKERRQ(ierr); 1076 } 1077 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1078 if (flag & SOR_ZERO_INITIAL_GUESS) { 1079 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1080 its--; 1081 } 1082 while (its--) { 1083 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1084 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1085 1086 /* update rhs: bb1 = bb - B*x */ 1087 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1088 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1089 1090 /* local sweep */ 1091 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1092 CHKERRQ(ierr); 1093 } 1094 } else { 1095 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1096 } 1097 1098 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1104 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1105 { 1106 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1107 Mat A = mat->A,B = mat->B; 1108 PetscErrorCode ierr; 1109 PetscReal isend[5],irecv[5]; 1110 1111 PetscFunctionBegin; 1112 info->block_size = 1.0; 1113 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1114 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1115 isend[3] = info->memory; isend[4] = info->mallocs; 1116 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1117 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1118 isend[3] += info->memory; isend[4] += info->mallocs; 1119 if (flag == MAT_LOCAL) { 1120 info->nz_used = isend[0]; 1121 info->nz_allocated = isend[1]; 1122 info->nz_unneeded = isend[2]; 1123 info->memory = isend[3]; 1124 info->mallocs = isend[4]; 1125 } else if (flag == MAT_GLOBAL_MAX) { 1126 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1127 info->nz_used = irecv[0]; 1128 info->nz_allocated = irecv[1]; 1129 info->nz_unneeded = irecv[2]; 1130 info->memory = irecv[3]; 1131 info->mallocs = irecv[4]; 1132 } else if (flag == MAT_GLOBAL_SUM) { 1133 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1134 info->nz_used = irecv[0]; 1135 info->nz_allocated = irecv[1]; 1136 info->nz_unneeded = irecv[2]; 1137 info->memory = irecv[3]; 1138 info->mallocs = irecv[4]; 1139 } 1140 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1141 info->fill_ratio_needed = 0; 1142 info->factor_mallocs = 0; 1143 info->rows_global = (double)matin->M; 1144 info->columns_global = (double)matin->N; 1145 info->rows_local = (double)matin->m; 1146 info->columns_local = (double)matin->N; 1147 1148 PetscFunctionReturn(0); 1149 } 1150 1151 #undef __FUNCT__ 1152 #define __FUNCT__ "MatSetOption_MPIAIJ" 1153 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1154 { 1155 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1156 PetscErrorCode ierr; 1157 1158 PetscFunctionBegin; 1159 switch (op) { 1160 case MAT_NO_NEW_NONZERO_LOCATIONS: 1161 case MAT_YES_NEW_NONZERO_LOCATIONS: 1162 case MAT_COLUMNS_UNSORTED: 1163 case MAT_COLUMNS_SORTED: 1164 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1165 case MAT_KEEP_ZEROED_ROWS: 1166 case MAT_NEW_NONZERO_LOCATION_ERR: 1167 case MAT_USE_INODES: 1168 case MAT_DO_NOT_USE_INODES: 1169 case MAT_IGNORE_ZERO_ENTRIES: 1170 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1171 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1172 break; 1173 case MAT_ROW_ORIENTED: 1174 a->roworiented = PETSC_TRUE; 1175 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1176 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1177 break; 1178 case MAT_ROWS_SORTED: 1179 case MAT_ROWS_UNSORTED: 1180 case MAT_YES_NEW_DIAGONALS: 1181 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1182 break; 1183 case MAT_COLUMN_ORIENTED: 1184 a->roworiented = PETSC_FALSE; 1185 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1186 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1187 break; 1188 case MAT_IGNORE_OFF_PROC_ENTRIES: 1189 a->donotstash = PETSC_TRUE; 1190 break; 1191 case MAT_NO_NEW_DIAGONALS: 1192 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1193 case MAT_SYMMETRIC: 1194 case MAT_STRUCTURALLY_SYMMETRIC: 1195 case MAT_HERMITIAN: 1196 case MAT_SYMMETRY_ETERNAL: 1197 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1198 break; 1199 case MAT_NOT_SYMMETRIC: 1200 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1201 case MAT_NOT_HERMITIAN: 1202 case MAT_NOT_SYMMETRY_ETERNAL: 1203 break; 1204 default: 1205 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1206 } 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNCT__ 1211 #define __FUNCT__ "MatGetRow_MPIAIJ" 1212 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1213 { 1214 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1215 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1216 PetscErrorCode ierr; 1217 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1218 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1219 PetscInt *cmap,*idx_p; 1220 1221 PetscFunctionBegin; 1222 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1223 mat->getrowactive = PETSC_TRUE; 1224 1225 if (!mat->rowvalues && (idx || v)) { 1226 /* 1227 allocate enough space to hold information from the longest row. 1228 */ 1229 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1230 PetscInt max = 1,tmp; 1231 for (i=0; i<matin->m; i++) { 1232 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1233 if (max < tmp) { max = tmp; } 1234 } 1235 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1236 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1237 } 1238 1239 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1240 lrow = row - rstart; 1241 1242 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1243 if (!v) {pvA = 0; pvB = 0;} 1244 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1245 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1246 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1247 nztot = nzA + nzB; 1248 1249 cmap = mat->garray; 1250 if (v || idx) { 1251 if (nztot) { 1252 /* Sort by increasing column numbers, assuming A and B already sorted */ 1253 PetscInt imark = -1; 1254 if (v) { 1255 *v = v_p = mat->rowvalues; 1256 for (i=0; i<nzB; i++) { 1257 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1258 else break; 1259 } 1260 imark = i; 1261 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1262 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1263 } 1264 if (idx) { 1265 *idx = idx_p = mat->rowindices; 1266 if (imark > -1) { 1267 for (i=0; i<imark; i++) { 1268 idx_p[i] = cmap[cworkB[i]]; 1269 } 1270 } else { 1271 for (i=0; i<nzB; i++) { 1272 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1273 else break; 1274 } 1275 imark = i; 1276 } 1277 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1278 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1279 } 1280 } else { 1281 if (idx) *idx = 0; 1282 if (v) *v = 0; 1283 } 1284 } 1285 *nz = nztot; 1286 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1287 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1293 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1294 { 1295 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1296 1297 PetscFunctionBegin; 1298 if (aij->getrowactive == PETSC_FALSE) { 1299 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1300 } 1301 aij->getrowactive = PETSC_FALSE; 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatNorm_MPIAIJ" 1307 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1308 { 1309 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1310 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1311 PetscErrorCode ierr; 1312 PetscInt i,j,cstart = aij->cstart; 1313 PetscReal sum = 0.0; 1314 PetscScalar *v; 1315 1316 PetscFunctionBegin; 1317 if (aij->size == 1) { 1318 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1319 } else { 1320 if (type == NORM_FROBENIUS) { 1321 v = amat->a; 1322 for (i=0; i<amat->nz; i++) { 1323 #if defined(PETSC_USE_COMPLEX) 1324 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1325 #else 1326 sum += (*v)*(*v); v++; 1327 #endif 1328 } 1329 v = bmat->a; 1330 for (i=0; i<bmat->nz; i++) { 1331 #if defined(PETSC_USE_COMPLEX) 1332 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1333 #else 1334 sum += (*v)*(*v); v++; 1335 #endif 1336 } 1337 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1338 *norm = sqrt(*norm); 1339 } else if (type == NORM_1) { /* max column norm */ 1340 PetscReal *tmp,*tmp2; 1341 PetscInt *jj,*garray = aij->garray; 1342 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1343 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1344 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1345 *norm = 0.0; 1346 v = amat->a; jj = amat->j; 1347 for (j=0; j<amat->nz; j++) { 1348 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1349 } 1350 v = bmat->a; jj = bmat->j; 1351 for (j=0; j<bmat->nz; j++) { 1352 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1353 } 1354 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1355 for (j=0; j<mat->N; j++) { 1356 if (tmp2[j] > *norm) *norm = tmp2[j]; 1357 } 1358 ierr = PetscFree(tmp);CHKERRQ(ierr); 1359 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1360 } else if (type == NORM_INFINITY) { /* max row norm */ 1361 PetscReal ntemp = 0.0; 1362 for (j=0; j<aij->A->m; j++) { 1363 v = amat->a + amat->i[j]; 1364 sum = 0.0; 1365 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1366 sum += PetscAbsScalar(*v); v++; 1367 } 1368 v = bmat->a + bmat->i[j]; 1369 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1370 sum += PetscAbsScalar(*v); v++; 1371 } 1372 if (sum > ntemp) ntemp = sum; 1373 } 1374 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1375 } else { 1376 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1377 } 1378 } 1379 PetscFunctionReturn(0); 1380 } 1381 1382 #undef __FUNCT__ 1383 #define __FUNCT__ "MatTranspose_MPIAIJ" 1384 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1385 { 1386 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1387 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1388 PetscErrorCode ierr; 1389 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1390 Mat B; 1391 PetscScalar *array; 1392 1393 PetscFunctionBegin; 1394 if (!matout && M != N) { 1395 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1396 } 1397 1398 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1399 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1400 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1401 1402 /* copy over the A part */ 1403 Aloc = (Mat_SeqAIJ*)a->A->data; 1404 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1405 row = a->rstart; 1406 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1407 for (i=0; i<m; i++) { 1408 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1409 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1410 } 1411 aj = Aloc->j; 1412 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1413 1414 /* copy over the B part */ 1415 Aloc = (Mat_SeqAIJ*)a->B->data; 1416 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1417 row = a->rstart; 1418 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1419 ct = cols; 1420 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1421 for (i=0; i<m; i++) { 1422 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1423 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1424 } 1425 ierr = PetscFree(ct);CHKERRQ(ierr); 1426 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1427 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1428 if (matout) { 1429 *matout = B; 1430 } else { 1431 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1432 } 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1438 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1439 { 1440 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1441 Mat a = aij->A,b = aij->B; 1442 PetscErrorCode ierr; 1443 PetscInt s1,s2,s3; 1444 1445 PetscFunctionBegin; 1446 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1447 if (rr) { 1448 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1449 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1450 /* Overlap communication with computation. */ 1451 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1452 } 1453 if (ll) { 1454 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1455 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1456 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1457 } 1458 /* scale the diagonal block */ 1459 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1460 1461 if (rr) { 1462 /* Do a scatter end and then right scale the off-diagonal block */ 1463 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1464 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1465 } 1466 1467 PetscFunctionReturn(0); 1468 } 1469 1470 1471 #undef __FUNCT__ 1472 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1473 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1474 { 1475 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1476 PetscErrorCode ierr; 1477 1478 PetscFunctionBegin; 1479 if (!a->rank) { 1480 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1481 } 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1487 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,PetscInt *bs) 1488 { 1489 PetscFunctionBegin; 1490 *bs = 1; 1491 PetscFunctionReturn(0); 1492 } 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1495 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1496 { 1497 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1498 PetscErrorCode ierr; 1499 1500 PetscFunctionBegin; 1501 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1502 PetscFunctionReturn(0); 1503 } 1504 1505 #undef __FUNCT__ 1506 #define __FUNCT__ "MatEqual_MPIAIJ" 1507 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1508 { 1509 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1510 Mat a,b,c,d; 1511 PetscTruth flg; 1512 PetscErrorCode ierr; 1513 1514 PetscFunctionBegin; 1515 a = matA->A; b = matA->B; 1516 c = matB->A; d = matB->B; 1517 1518 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1519 if (flg == PETSC_TRUE) { 1520 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1521 } 1522 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatCopy_MPIAIJ" 1528 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1529 { 1530 PetscErrorCode ierr; 1531 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1532 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1533 1534 PetscFunctionBegin; 1535 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1536 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1537 /* because of the column compression in the off-processor part of the matrix a->B, 1538 the number of columns in a->B and b->B may be different, hence we cannot call 1539 the MatCopy() directly on the two parts. If need be, we can provide a more 1540 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1541 then copying the submatrices */ 1542 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1543 } else { 1544 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1545 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1546 } 1547 PetscFunctionReturn(0); 1548 } 1549 1550 #undef __FUNCT__ 1551 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1552 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1553 { 1554 PetscErrorCode ierr; 1555 1556 PetscFunctionBegin; 1557 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 1561 #include "petscblaslapack.h" 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatAXPY_MPIAIJ" 1564 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1565 { 1566 PetscErrorCode ierr; 1567 PetscInt i; 1568 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1569 PetscBLASInt bnz,one=1; 1570 Mat_SeqAIJ *x,*y; 1571 1572 PetscFunctionBegin; 1573 if (str == SAME_NONZERO_PATTERN) { 1574 x = (Mat_SeqAIJ *)xx->A->data; 1575 y = (Mat_SeqAIJ *)yy->A->data; 1576 bnz = (PetscBLASInt)x->nz; 1577 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1578 x = (Mat_SeqAIJ *)xx->B->data; 1579 y = (Mat_SeqAIJ *)yy->B->data; 1580 bnz = (PetscBLASInt)x->nz; 1581 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1582 } else if (str == SUBSET_NONZERO_PATTERN) { 1583 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1584 1585 x = (Mat_SeqAIJ *)xx->B->data; 1586 y = (Mat_SeqAIJ *)yy->B->data; 1587 if (y->xtoy && y->XtoY != xx->B) { 1588 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1589 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1590 } 1591 if (!y->xtoy) { /* get xtoy */ 1592 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1593 y->XtoY = xx->B; 1594 } 1595 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1596 } else { 1597 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 /* -------------------------------------------------------------------*/ 1603 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1604 MatGetRow_MPIAIJ, 1605 MatRestoreRow_MPIAIJ, 1606 MatMult_MPIAIJ, 1607 /* 4*/ MatMultAdd_MPIAIJ, 1608 MatMultTranspose_MPIAIJ, 1609 MatMultTransposeAdd_MPIAIJ, 1610 0, 1611 0, 1612 0, 1613 /*10*/ 0, 1614 0, 1615 0, 1616 MatRelax_MPIAIJ, 1617 MatTranspose_MPIAIJ, 1618 /*15*/ MatGetInfo_MPIAIJ, 1619 MatEqual_MPIAIJ, 1620 MatGetDiagonal_MPIAIJ, 1621 MatDiagonalScale_MPIAIJ, 1622 MatNorm_MPIAIJ, 1623 /*20*/ MatAssemblyBegin_MPIAIJ, 1624 MatAssemblyEnd_MPIAIJ, 1625 0, 1626 MatSetOption_MPIAIJ, 1627 MatZeroEntries_MPIAIJ, 1628 /*25*/ MatZeroRows_MPIAIJ, 1629 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1630 MatLUFactorSymbolic_MPIAIJ_TFS, 1631 #else 1632 0, 1633 #endif 1634 0, 1635 0, 1636 0, 1637 /*30*/ MatSetUpPreallocation_MPIAIJ, 1638 0, 1639 0, 1640 0, 1641 0, 1642 /*35*/ MatDuplicate_MPIAIJ, 1643 0, 1644 0, 1645 0, 1646 0, 1647 /*40*/ MatAXPY_MPIAIJ, 1648 MatGetSubMatrices_MPIAIJ, 1649 MatIncreaseOverlap_MPIAIJ, 1650 MatGetValues_MPIAIJ, 1651 MatCopy_MPIAIJ, 1652 /*45*/ MatPrintHelp_MPIAIJ, 1653 MatScale_MPIAIJ, 1654 0, 1655 0, 1656 0, 1657 /*50*/ MatGetBlockSize_MPIAIJ, 1658 0, 1659 0, 1660 0, 1661 0, 1662 /*55*/ MatFDColoringCreate_MPIAIJ, 1663 0, 1664 MatSetUnfactored_MPIAIJ, 1665 0, 1666 0, 1667 /*60*/ MatGetSubMatrix_MPIAIJ, 1668 MatDestroy_MPIAIJ, 1669 MatView_MPIAIJ, 1670 MatGetPetscMaps_Petsc, 1671 0, 1672 /*65*/ 0, 1673 0, 1674 0, 1675 0, 1676 0, 1677 /*70*/ 0, 1678 0, 1679 MatSetColoring_MPIAIJ, 1680 MatSetValuesAdic_MPIAIJ, 1681 MatSetValuesAdifor_MPIAIJ, 1682 /*75*/ 0, 1683 0, 1684 0, 1685 0, 1686 0, 1687 /*80*/ 0, 1688 0, 1689 0, 1690 0, 1691 /*84*/ MatLoad_MPIAIJ, 1692 0, 1693 0, 1694 0, 1695 0, 1696 0, 1697 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1698 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1699 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1700 MatPtAP_MPIAIJ_MPIAIJ, 1701 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1702 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 1703 0, 1704 0, 1705 0}; 1706 1707 /* ----------------------------------------------------------------------------------------*/ 1708 1709 EXTERN_C_BEGIN 1710 #undef __FUNCT__ 1711 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1712 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1713 { 1714 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1715 PetscErrorCode ierr; 1716 1717 PetscFunctionBegin; 1718 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1719 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1720 PetscFunctionReturn(0); 1721 } 1722 EXTERN_C_END 1723 1724 EXTERN_C_BEGIN 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1727 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1728 { 1729 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1730 PetscErrorCode ierr; 1731 1732 PetscFunctionBegin; 1733 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1734 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1735 PetscFunctionReturn(0); 1736 } 1737 EXTERN_C_END 1738 1739 #include "petscpc.h" 1740 EXTERN_C_BEGIN 1741 #undef __FUNCT__ 1742 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1743 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1744 { 1745 Mat_MPIAIJ *b; 1746 PetscErrorCode ierr; 1747 PetscInt i; 1748 1749 PetscFunctionBegin; 1750 B->preallocated = PETSC_TRUE; 1751 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1752 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1753 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1754 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1755 if (d_nnz) { 1756 for (i=0; i<B->m; i++) { 1757 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1758 } 1759 } 1760 if (o_nnz) { 1761 for (i=0; i<B->m; i++) { 1762 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1763 } 1764 } 1765 b = (Mat_MPIAIJ*)B->data; 1766 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1767 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1768 1769 PetscFunctionReturn(0); 1770 } 1771 EXTERN_C_END 1772 1773 /*MC 1774 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1775 1776 Options Database Keys: 1777 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1778 1779 Level: beginner 1780 1781 .seealso: MatCreateMPIAIJ 1782 M*/ 1783 1784 EXTERN_C_BEGIN 1785 #undef __FUNCT__ 1786 #define __FUNCT__ "MatCreate_MPIAIJ" 1787 PetscErrorCode MatCreate_MPIAIJ(Mat B) 1788 { 1789 Mat_MPIAIJ *b; 1790 PetscErrorCode ierr; 1791 PetscInt i; 1792 PetscMPIInt size; 1793 1794 PetscFunctionBegin; 1795 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1796 1797 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1798 B->data = (void*)b; 1799 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1800 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1801 B->factor = 0; 1802 B->assembled = PETSC_FALSE; 1803 B->mapping = 0; 1804 1805 B->insertmode = NOT_SET_VALUES; 1806 b->size = size; 1807 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1808 1809 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1810 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1811 1812 /* the information in the maps duplicates the information computed below, eventually 1813 we should remove the duplicate information that is not contained in the maps */ 1814 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1815 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1816 1817 /* build local table of row and column ownerships */ 1818 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 1819 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1820 b->cowners = b->rowners + b->size + 2; 1821 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 1822 b->rowners[0] = 0; 1823 for (i=2; i<=b->size; i++) { 1824 b->rowners[i] += b->rowners[i-1]; 1825 } 1826 b->rstart = b->rowners[b->rank]; 1827 b->rend = b->rowners[b->rank+1]; 1828 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 1829 b->cowners[0] = 0; 1830 for (i=2; i<=b->size; i++) { 1831 b->cowners[i] += b->cowners[i-1]; 1832 } 1833 b->cstart = b->cowners[b->rank]; 1834 b->cend = b->cowners[b->rank+1]; 1835 1836 /* build cache for off array entries formed */ 1837 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1838 b->donotstash = PETSC_FALSE; 1839 b->colmap = 0; 1840 b->garray = 0; 1841 b->roworiented = PETSC_TRUE; 1842 1843 /* stuff used for matrix vector multiply */ 1844 b->lvec = PETSC_NULL; 1845 b->Mvctx = PETSC_NULL; 1846 1847 /* stuff for MatGetRow() */ 1848 b->rowindices = 0; 1849 b->rowvalues = 0; 1850 b->getrowactive = PETSC_FALSE; 1851 1852 /* Explicitly create 2 MATSEQAIJ matrices. */ 1853 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1854 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1855 PetscLogObjectParent(B,b->A); 1856 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1857 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1858 PetscLogObjectParent(B,b->B); 1859 1860 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1861 "MatStoreValues_MPIAIJ", 1862 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1863 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1864 "MatRetrieveValues_MPIAIJ", 1865 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1866 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1867 "MatGetDiagonalBlock_MPIAIJ", 1868 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1869 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1870 "MatIsTranspose_MPIAIJ", 1871 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 1872 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1873 "MatMPIAIJSetPreallocation_MPIAIJ", 1874 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 1875 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1876 "MatDiagonalScaleLocal_MPIAIJ", 1877 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 1878 PetscFunctionReturn(0); 1879 } 1880 EXTERN_C_END 1881 1882 /*MC 1883 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1884 1885 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1886 and MATMPIAIJ otherwise. As a result, for single process communicators, 1887 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1888 for communicators controlling multiple processes. It is recommended that you call both of 1889 the above preallocation routines for simplicity. 1890 1891 Options Database Keys: 1892 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1893 1894 Level: beginner 1895 1896 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1897 M*/ 1898 1899 EXTERN_C_BEGIN 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatCreate_AIJ" 1902 PetscErrorCode MatCreate_AIJ(Mat A) 1903 { 1904 PetscErrorCode ierr; 1905 PetscMPIInt size; 1906 1907 PetscFunctionBegin; 1908 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1909 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1910 if (size == 1) { 1911 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1912 } else { 1913 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1914 } 1915 PetscFunctionReturn(0); 1916 } 1917 EXTERN_C_END 1918 1919 #undef __FUNCT__ 1920 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1921 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1922 { 1923 Mat mat; 1924 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1925 PetscErrorCode ierr; 1926 1927 PetscFunctionBegin; 1928 *newmat = 0; 1929 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1930 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1931 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1932 a = (Mat_MPIAIJ*)mat->data; 1933 1934 mat->factor = matin->factor; 1935 mat->assembled = PETSC_TRUE; 1936 mat->insertmode = NOT_SET_VALUES; 1937 mat->preallocated = PETSC_TRUE; 1938 1939 a->rstart = oldmat->rstart; 1940 a->rend = oldmat->rend; 1941 a->cstart = oldmat->cstart; 1942 a->cend = oldmat->cend; 1943 a->size = oldmat->size; 1944 a->rank = oldmat->rank; 1945 a->donotstash = oldmat->donotstash; 1946 a->roworiented = oldmat->roworiented; 1947 a->rowindices = 0; 1948 a->rowvalues = 0; 1949 a->getrowactive = PETSC_FALSE; 1950 1951 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1952 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1953 if (oldmat->colmap) { 1954 #if defined (PETSC_USE_CTABLE) 1955 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1956 #else 1957 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1958 PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt)); 1959 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1960 #endif 1961 } else a->colmap = 0; 1962 if (oldmat->garray) { 1963 PetscInt len; 1964 len = oldmat->B->n; 1965 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1966 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 1967 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1968 } else a->garray = 0; 1969 1970 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1971 PetscLogObjectParent(mat,a->lvec); 1972 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1973 PetscLogObjectParent(mat,a->Mvctx); 1974 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1975 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1976 PetscLogObjectParent(mat,a->A); 1977 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1978 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1979 PetscLogObjectParent(mat,a->B); 1980 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1981 *newmat = mat; 1982 PetscFunctionReturn(0); 1983 } 1984 1985 #include "petscsys.h" 1986 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "MatLoad_MPIAIJ" 1989 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1990 { 1991 Mat A; 1992 PetscScalar *vals,*svals; 1993 MPI_Comm comm = ((PetscObject)viewer)->comm; 1994 MPI_Status status; 1995 PetscErrorCode ierr; 1996 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*rowners,maxnz; 1997 PetscInt i,nz,j,rstart,rend; 1998 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1999 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 2000 PetscInt cend,cstart,n; 2001 int fd; 2002 2003 PetscFunctionBegin; 2004 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2005 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2006 if (!rank) { 2007 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2008 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2009 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2010 if (header[3] < 0) { 2011 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 2012 } 2013 } 2014 2015 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2016 M = header[1]; N = header[2]; 2017 /* determine ownership of all rows */ 2018 m = M/size + ((M % size) > rank); 2019 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 2020 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2021 rowners[0] = 0; 2022 for (i=2; i<=size; i++) { 2023 rowners[i] += rowners[i-1]; 2024 } 2025 rstart = rowners[rank]; 2026 rend = rowners[rank+1]; 2027 2028 /* distribute row lengths to all processors */ 2029 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 2030 offlens = ourlens + (rend-rstart); 2031 if (!rank) { 2032 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 2033 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2034 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 2035 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2036 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2037 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2038 } else { 2039 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 2040 } 2041 2042 if (!rank) { 2043 /* calculate the number of nonzeros on each processor */ 2044 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 2045 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 2046 for (i=0; i<size; i++) { 2047 for (j=rowners[i]; j< rowners[i+1]; j++) { 2048 procsnz[i] += rowlengths[j]; 2049 } 2050 } 2051 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2052 2053 /* determine max buffer needed and allocate it */ 2054 maxnz = 0; 2055 for (i=0; i<size; i++) { 2056 maxnz = PetscMax(maxnz,procsnz[i]); 2057 } 2058 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2059 2060 /* read in my part of the matrix column indices */ 2061 nz = procsnz[0]; 2062 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2063 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2064 2065 /* read in every one elses and ship off */ 2066 for (i=1; i<size; i++) { 2067 nz = procsnz[i]; 2068 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2069 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2070 } 2071 ierr = PetscFree(cols);CHKERRQ(ierr); 2072 } else { 2073 /* determine buffer space needed for message */ 2074 nz = 0; 2075 for (i=0; i<m; i++) { 2076 nz += ourlens[i]; 2077 } 2078 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 2079 2080 /* receive message of column indices*/ 2081 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 2082 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 2083 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2084 } 2085 2086 /* determine column ownership if matrix is not square */ 2087 if (N != M) { 2088 n = N/size + ((N % size) > rank); 2089 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2090 cstart = cend - n; 2091 } else { 2092 cstart = rstart; 2093 cend = rend; 2094 n = cend - cstart; 2095 } 2096 2097 /* loop over local rows, determining number of off diagonal entries */ 2098 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2099 jj = 0; 2100 for (i=0; i<m; i++) { 2101 for (j=0; j<ourlens[i]; j++) { 2102 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2103 jj++; 2104 } 2105 } 2106 2107 /* create our matrix */ 2108 for (i=0; i<m; i++) { 2109 ourlens[i] -= offlens[i]; 2110 } 2111 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2112 ierr = MatSetType(A,type);CHKERRQ(ierr); 2113 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2114 2115 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2116 for (i=0; i<m; i++) { 2117 ourlens[i] += offlens[i]; 2118 } 2119 2120 if (!rank) { 2121 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2122 2123 /* read in my part of the matrix numerical values */ 2124 nz = procsnz[0]; 2125 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2126 2127 /* insert into matrix */ 2128 jj = rstart; 2129 smycols = mycols; 2130 svals = vals; 2131 for (i=0; i<m; i++) { 2132 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2133 smycols += ourlens[i]; 2134 svals += ourlens[i]; 2135 jj++; 2136 } 2137 2138 /* read in other processors and ship out */ 2139 for (i=1; i<size; i++) { 2140 nz = procsnz[i]; 2141 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2142 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2143 } 2144 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2145 } else { 2146 /* receive numeric values */ 2147 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2148 2149 /* receive message of values*/ 2150 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2151 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2152 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2153 2154 /* insert into matrix */ 2155 jj = rstart; 2156 smycols = mycols; 2157 svals = vals; 2158 for (i=0; i<m; i++) { 2159 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2160 smycols += ourlens[i]; 2161 svals += ourlens[i]; 2162 jj++; 2163 } 2164 } 2165 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2166 ierr = PetscFree(vals);CHKERRQ(ierr); 2167 ierr = PetscFree(mycols);CHKERRQ(ierr); 2168 ierr = PetscFree(rowners);CHKERRQ(ierr); 2169 2170 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2171 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2172 *newmat = A; 2173 PetscFunctionReturn(0); 2174 } 2175 2176 #undef __FUNCT__ 2177 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2178 /* 2179 Not great since it makes two copies of the submatrix, first an SeqAIJ 2180 in local and then by concatenating the local matrices the end result. 2181 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2182 */ 2183 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2184 { 2185 PetscErrorCode ierr; 2186 PetscMPIInt rank,size; 2187 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2188 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2189 Mat *local,M,Mreuse; 2190 PetscScalar *vwork,*aa; 2191 MPI_Comm comm = mat->comm; 2192 Mat_SeqAIJ *aij; 2193 2194 2195 PetscFunctionBegin; 2196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2198 2199 if (call == MAT_REUSE_MATRIX) { 2200 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2201 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2202 local = &Mreuse; 2203 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2204 } else { 2205 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2206 Mreuse = *local; 2207 ierr = PetscFree(local);CHKERRQ(ierr); 2208 } 2209 2210 /* 2211 m - number of local rows 2212 n - number of columns (same on all processors) 2213 rstart - first row in new global matrix generated 2214 */ 2215 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2216 if (call == MAT_INITIAL_MATRIX) { 2217 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2218 ii = aij->i; 2219 jj = aij->j; 2220 2221 /* 2222 Determine the number of non-zeros in the diagonal and off-diagonal 2223 portions of the matrix in order to do correct preallocation 2224 */ 2225 2226 /* first get start and end of "diagonal" columns */ 2227 if (csize == PETSC_DECIDE) { 2228 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2229 if (mglobal == n) { /* square matrix */ 2230 nlocal = m; 2231 } else { 2232 nlocal = n/size + ((n % size) > rank); 2233 } 2234 } else { 2235 nlocal = csize; 2236 } 2237 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2238 rstart = rend - nlocal; 2239 if (rank == size - 1 && rend != n) { 2240 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2241 } 2242 2243 /* next, compute all the lengths */ 2244 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2245 olens = dlens + m; 2246 for (i=0; i<m; i++) { 2247 jend = ii[i+1] - ii[i]; 2248 olen = 0; 2249 dlen = 0; 2250 for (j=0; j<jend; j++) { 2251 if (*jj < rstart || *jj >= rend) olen++; 2252 else dlen++; 2253 jj++; 2254 } 2255 olens[i] = olen; 2256 dlens[i] = dlen; 2257 } 2258 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2259 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2260 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2261 ierr = PetscFree(dlens);CHKERRQ(ierr); 2262 } else { 2263 PetscInt ml,nl; 2264 2265 M = *newmat; 2266 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2267 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2268 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2269 /* 2270 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2271 rather than the slower MatSetValues(). 2272 */ 2273 M->was_assembled = PETSC_TRUE; 2274 M->assembled = PETSC_FALSE; 2275 } 2276 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2277 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2278 ii = aij->i; 2279 jj = aij->j; 2280 aa = aij->a; 2281 for (i=0; i<m; i++) { 2282 row = rstart + i; 2283 nz = ii[i+1] - ii[i]; 2284 cwork = jj; jj += nz; 2285 vwork = aa; aa += nz; 2286 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2287 } 2288 2289 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2290 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2291 *newmat = M; 2292 2293 /* save submatrix used in processor for next request */ 2294 if (call == MAT_INITIAL_MATRIX) { 2295 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2296 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2297 } 2298 2299 PetscFunctionReturn(0); 2300 } 2301 2302 #undef __FUNCT__ 2303 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2304 /*@C 2305 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2306 (the default parallel PETSc format). For good matrix assembly performance 2307 the user should preallocate the matrix storage by setting the parameters 2308 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2309 performance can be increased by more than a factor of 50. 2310 2311 Collective on MPI_Comm 2312 2313 Input Parameters: 2314 + A - the matrix 2315 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2316 (same value is used for all local rows) 2317 . d_nnz - array containing the number of nonzeros in the various rows of the 2318 DIAGONAL portion of the local submatrix (possibly different for each row) 2319 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2320 The size of this array is equal to the number of local rows, i.e 'm'. 2321 You must leave room for the diagonal entry even if it is zero. 2322 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2323 submatrix (same value is used for all local rows). 2324 - o_nnz - array containing the number of nonzeros in the various rows of the 2325 OFF-DIAGONAL portion of the local submatrix (possibly different for 2326 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2327 structure. The size of this array is equal to the number 2328 of local rows, i.e 'm'. 2329 2330 The AIJ format (also called the Yale sparse matrix format or 2331 compressed row storage), is fully compatible with standard Fortran 77 2332 storage. That is, the stored row and column indices can begin at 2333 either one (as in Fortran) or zero. See the users manual for details. 2334 2335 The user MUST specify either the local or global matrix dimensions 2336 (possibly both). 2337 2338 The parallel matrix is partitioned such that the first m0 rows belong to 2339 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2340 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2341 2342 The DIAGONAL portion of the local submatrix of a processor can be defined 2343 as the submatrix which is obtained by extraction the part corresponding 2344 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2345 first row that belongs to the processor, and r2 is the last row belonging 2346 to the this processor. This is a square mxm matrix. The remaining portion 2347 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2348 2349 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2350 2351 By default, this format uses inodes (identical nodes) when possible. 2352 We search for consecutive rows with the same nonzero structure, thereby 2353 reusing matrix information to achieve increased efficiency. 2354 2355 Options Database Keys: 2356 + -mat_aij_no_inode - Do not use inodes 2357 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2358 - -mat_aij_oneindex - Internally use indexing starting at 1 2359 rather than 0. Note that when calling MatSetValues(), 2360 the user still MUST index entries starting at 0! 2361 2362 Example usage: 2363 2364 Consider the following 8x8 matrix with 34 non-zero values, that is 2365 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2366 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2367 as follows: 2368 2369 .vb 2370 1 2 0 | 0 3 0 | 0 4 2371 Proc0 0 5 6 | 7 0 0 | 8 0 2372 9 0 10 | 11 0 0 | 12 0 2373 ------------------------------------- 2374 13 0 14 | 15 16 17 | 0 0 2375 Proc1 0 18 0 | 19 20 21 | 0 0 2376 0 0 0 | 22 23 0 | 24 0 2377 ------------------------------------- 2378 Proc2 25 26 27 | 0 0 28 | 29 0 2379 30 0 0 | 31 32 33 | 0 34 2380 .ve 2381 2382 This can be represented as a collection of submatrices as: 2383 2384 .vb 2385 A B C 2386 D E F 2387 G H I 2388 .ve 2389 2390 Where the submatrices A,B,C are owned by proc0, D,E,F are 2391 owned by proc1, G,H,I are owned by proc2. 2392 2393 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2394 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2395 The 'M','N' parameters are 8,8, and have the same values on all procs. 2396 2397 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2398 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2399 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2400 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2401 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2402 matrix, ans [DF] as another SeqAIJ matrix. 2403 2404 When d_nz, o_nz parameters are specified, d_nz storage elements are 2405 allocated for every row of the local diagonal submatrix, and o_nz 2406 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2407 One way to choose d_nz and o_nz is to use the max nonzerors per local 2408 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2409 In this case, the values of d_nz,o_nz are: 2410 .vb 2411 proc0 : dnz = 2, o_nz = 2 2412 proc1 : dnz = 3, o_nz = 2 2413 proc2 : dnz = 1, o_nz = 4 2414 .ve 2415 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2416 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2417 for proc3. i.e we are using 12+15+10=37 storage locations to store 2418 34 values. 2419 2420 When d_nnz, o_nnz parameters are specified, the storage is specified 2421 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2422 In the above case the values for d_nnz,o_nnz are: 2423 .vb 2424 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2425 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2426 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2427 .ve 2428 Here the space allocated is sum of all the above values i.e 34, and 2429 hence pre-allocation is perfect. 2430 2431 Level: intermediate 2432 2433 .keywords: matrix, aij, compressed row, sparse, parallel 2434 2435 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2436 @*/ 2437 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2438 { 2439 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2440 2441 PetscFunctionBegin; 2442 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2443 if (f) { 2444 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2445 } 2446 PetscFunctionReturn(0); 2447 } 2448 2449 #undef __FUNCT__ 2450 #define __FUNCT__ "MatCreateMPIAIJ" 2451 /*@C 2452 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2453 (the default parallel PETSc format). For good matrix assembly performance 2454 the user should preallocate the matrix storage by setting the parameters 2455 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2456 performance can be increased by more than a factor of 50. 2457 2458 Collective on MPI_Comm 2459 2460 Input Parameters: 2461 + comm - MPI communicator 2462 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2463 This value should be the same as the local size used in creating the 2464 y vector for the matrix-vector product y = Ax. 2465 . n - This value should be the same as the local size used in creating the 2466 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2467 calculated if N is given) For square matrices n is almost always m. 2468 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2469 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2470 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2471 (same value is used for all local rows) 2472 . d_nnz - array containing the number of nonzeros in the various rows of the 2473 DIAGONAL portion of the local submatrix (possibly different for each row) 2474 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2475 The size of this array is equal to the number of local rows, i.e 'm'. 2476 You must leave room for the diagonal entry even if it is zero. 2477 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2478 submatrix (same value is used for all local rows). 2479 - o_nnz - array containing the number of nonzeros in the various rows of the 2480 OFF-DIAGONAL portion of the local submatrix (possibly different for 2481 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2482 structure. The size of this array is equal to the number 2483 of local rows, i.e 'm'. 2484 2485 Output Parameter: 2486 . A - the matrix 2487 2488 Notes: 2489 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2490 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2491 storage requirements for this matrix. 2492 2493 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2494 processor than it must be used on all processors that share the object for 2495 that argument. 2496 2497 The AIJ format (also called the Yale sparse matrix format or 2498 compressed row storage), is fully compatible with standard Fortran 77 2499 storage. That is, the stored row and column indices can begin at 2500 either one (as in Fortran) or zero. See the users manual for details. 2501 2502 The user MUST specify either the local or global matrix dimensions 2503 (possibly both). 2504 2505 The parallel matrix is partitioned such that the first m0 rows belong to 2506 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2507 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2508 2509 The DIAGONAL portion of the local submatrix of a processor can be defined 2510 as the submatrix which is obtained by extraction the part corresponding 2511 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2512 first row that belongs to the processor, and r2 is the last row belonging 2513 to the this processor. This is a square mxm matrix. The remaining portion 2514 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2515 2516 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2517 2518 When calling this routine with a single process communicator, a matrix of 2519 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2520 type of communicator, use the construction mechanism: 2521 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2522 2523 By default, this format uses inodes (identical nodes) when possible. 2524 We search for consecutive rows with the same nonzero structure, thereby 2525 reusing matrix information to achieve increased efficiency. 2526 2527 Options Database Keys: 2528 + -mat_aij_no_inode - Do not use inodes 2529 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2530 - -mat_aij_oneindex - Internally use indexing starting at 1 2531 rather than 0. Note that when calling MatSetValues(), 2532 the user still MUST index entries starting at 0! 2533 2534 2535 Example usage: 2536 2537 Consider the following 8x8 matrix with 34 non-zero values, that is 2538 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2539 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2540 as follows: 2541 2542 .vb 2543 1 2 0 | 0 3 0 | 0 4 2544 Proc0 0 5 6 | 7 0 0 | 8 0 2545 9 0 10 | 11 0 0 | 12 0 2546 ------------------------------------- 2547 13 0 14 | 15 16 17 | 0 0 2548 Proc1 0 18 0 | 19 20 21 | 0 0 2549 0 0 0 | 22 23 0 | 24 0 2550 ------------------------------------- 2551 Proc2 25 26 27 | 0 0 28 | 29 0 2552 30 0 0 | 31 32 33 | 0 34 2553 .ve 2554 2555 This can be represented as a collection of submatrices as: 2556 2557 .vb 2558 A B C 2559 D E F 2560 G H I 2561 .ve 2562 2563 Where the submatrices A,B,C are owned by proc0, D,E,F are 2564 owned by proc1, G,H,I are owned by proc2. 2565 2566 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2567 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2568 The 'M','N' parameters are 8,8, and have the same values on all procs. 2569 2570 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2571 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2572 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2573 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2574 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2575 matrix, ans [DF] as another SeqAIJ matrix. 2576 2577 When d_nz, o_nz parameters are specified, d_nz storage elements are 2578 allocated for every row of the local diagonal submatrix, and o_nz 2579 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2580 One way to choose d_nz and o_nz is to use the max nonzerors per local 2581 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2582 In this case, the values of d_nz,o_nz are: 2583 .vb 2584 proc0 : dnz = 2, o_nz = 2 2585 proc1 : dnz = 3, o_nz = 2 2586 proc2 : dnz = 1, o_nz = 4 2587 .ve 2588 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2589 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2590 for proc3. i.e we are using 12+15+10=37 storage locations to store 2591 34 values. 2592 2593 When d_nnz, o_nnz parameters are specified, the storage is specified 2594 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2595 In the above case the values for d_nnz,o_nnz are: 2596 .vb 2597 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2598 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2599 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2600 .ve 2601 Here the space allocated is sum of all the above values i.e 34, and 2602 hence pre-allocation is perfect. 2603 2604 Level: intermediate 2605 2606 .keywords: matrix, aij, compressed row, sparse, parallel 2607 2608 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2609 @*/ 2610 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2611 { 2612 PetscErrorCode ierr; 2613 PetscMPIInt size; 2614 2615 PetscFunctionBegin; 2616 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2617 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2618 if (size > 1) { 2619 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2620 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2621 } else { 2622 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2623 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2624 } 2625 PetscFunctionReturn(0); 2626 } 2627 2628 #undef __FUNCT__ 2629 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2630 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2631 { 2632 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2633 2634 PetscFunctionBegin; 2635 *Ad = a->A; 2636 *Ao = a->B; 2637 *colmap = a->garray; 2638 PetscFunctionReturn(0); 2639 } 2640 2641 #undef __FUNCT__ 2642 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2643 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2644 { 2645 PetscErrorCode ierr; 2646 PetscInt i; 2647 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2648 2649 PetscFunctionBegin; 2650 if (coloring->ctype == IS_COLORING_LOCAL) { 2651 ISColoringValue *allcolors,*colors; 2652 ISColoring ocoloring; 2653 2654 /* set coloring for diagonal portion */ 2655 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2656 2657 /* set coloring for off-diagonal portion */ 2658 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2659 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2660 for (i=0; i<a->B->n; i++) { 2661 colors[i] = allcolors[a->garray[i]]; 2662 } 2663 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2664 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2665 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2666 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2667 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2668 ISColoringValue *colors; 2669 PetscInt *larray; 2670 ISColoring ocoloring; 2671 2672 /* set coloring for diagonal portion */ 2673 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2674 for (i=0; i<a->A->n; i++) { 2675 larray[i] = i + a->cstart; 2676 } 2677 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2678 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2679 for (i=0; i<a->A->n; i++) { 2680 colors[i] = coloring->colors[larray[i]]; 2681 } 2682 ierr = PetscFree(larray);CHKERRQ(ierr); 2683 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2684 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2685 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2686 2687 /* set coloring for off-diagonal portion */ 2688 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2689 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2690 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2691 for (i=0; i<a->B->n; i++) { 2692 colors[i] = coloring->colors[larray[i]]; 2693 } 2694 ierr = PetscFree(larray);CHKERRQ(ierr); 2695 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2696 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2697 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2698 } else { 2699 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2700 } 2701 2702 PetscFunctionReturn(0); 2703 } 2704 2705 #undef __FUNCT__ 2706 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2707 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2708 { 2709 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2710 PetscErrorCode ierr; 2711 2712 PetscFunctionBegin; 2713 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2714 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2715 PetscFunctionReturn(0); 2716 } 2717 2718 #undef __FUNCT__ 2719 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2720 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2721 { 2722 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2723 PetscErrorCode ierr; 2724 2725 PetscFunctionBegin; 2726 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2727 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2728 PetscFunctionReturn(0); 2729 } 2730 2731 #undef __FUNCT__ 2732 #define __FUNCT__ "MatMerge" 2733 /*@C 2734 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2735 matrices from each processor 2736 2737 Collective on MPI_Comm 2738 2739 Input Parameters: 2740 + comm - the communicators the parallel matrix will live on 2741 . inmat - the input sequential matrices 2742 . n - number of local columns (or PETSC_DECIDE) 2743 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2744 2745 Output Parameter: 2746 . outmat - the parallel matrix generated 2747 2748 Level: advanced 2749 2750 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2751 2752 @*/ 2753 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2754 { 2755 PetscErrorCode ierr; 2756 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2757 const PetscInt *indx; 2758 const PetscScalar *values; 2759 PetscMap columnmap,rowmap; 2760 2761 PetscFunctionBegin; 2762 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2763 /* 2764 PetscMPIInt rank; 2765 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2766 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2767 */ 2768 if (scall == MAT_INITIAL_MATRIX){ 2769 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2770 if (n == PETSC_DECIDE){ 2771 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2772 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2773 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2774 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2775 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2776 } 2777 2778 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2779 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2780 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2781 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2782 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2783 2784 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2785 for (i=0;i<m;i++) { 2786 ierr = MatGetRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2787 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2788 ierr = MatRestoreRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2789 } 2790 /* This routine will ONLY return MPIAIJ type matrix */ 2791 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2792 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2793 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2794 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2795 2796 } else if (scall == MAT_REUSE_MATRIX){ 2797 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2798 } else { 2799 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 2800 } 2801 2802 for (i=0;i<m;i++) { 2803 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2804 I = i + rstart; 2805 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2806 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2807 } 2808 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2809 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2810 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2811 2812 PetscFunctionReturn(0); 2813 } 2814 2815 #undef __FUNCT__ 2816 #define __FUNCT__ "MatFileSplit" 2817 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2818 { 2819 PetscErrorCode ierr; 2820 PetscMPIInt rank; 2821 PetscInt m,N,i,rstart,nnz; 2822 size_t len; 2823 const PetscInt *indx; 2824 PetscViewer out; 2825 char *name; 2826 Mat B; 2827 const PetscScalar *values; 2828 2829 PetscFunctionBegin; 2830 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2831 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2832 /* Should this be the type of the diagonal block of A? */ 2833 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2834 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2835 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2836 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2837 for (i=0;i<m;i++) { 2838 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2839 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2840 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2841 } 2842 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2843 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2844 2845 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2846 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2847 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2848 sprintf(name,"%s.%d",outfile,rank); 2849 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2850 ierr = PetscFree(name); 2851 ierr = MatView(B,out);CHKERRQ(ierr); 2852 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2853 ierr = MatDestroy(B);CHKERRQ(ierr); 2854 PetscFunctionReturn(0); 2855 } 2856 2857 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2858 #undef __FUNCT__ 2859 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2860 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2861 { 2862 PetscErrorCode ierr; 2863 Mat_Merge_SeqsToMPI *merge; 2864 PetscObjectContainer container; 2865 2866 PetscFunctionBegin; 2867 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2868 if (container) { 2869 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2870 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2871 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2872 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2873 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2874 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2875 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2876 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2877 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2878 ierr = MatDestroy(merge->C_seq);CHKERRQ(ierr); 2879 2880 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2881 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2882 } 2883 ierr = PetscFree(merge);CHKERRQ(ierr); 2884 2885 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2886 PetscFunctionReturn(0); 2887 } 2888 2889 #include "src/mat/utils/freespace.h" 2890 #include "petscbt.h" 2891 #undef __FUNCT__ 2892 #define __FUNCT__ "MatMerge_SeqsToMPI" 2893 /*@C 2894 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2895 matrices from each processor 2896 2897 Collective on MPI_Comm 2898 2899 Input Parameters: 2900 + comm - the communicators the parallel matrix will live on 2901 . seqmat - the input sequential matrices 2902 . m - number of local rows (or PETSC_DECIDE) 2903 . n - number of local columns (or PETSC_DECIDE) 2904 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2905 2906 Output Parameter: 2907 . mpimat - the parallel matrix generated 2908 2909 Level: advanced 2910 2911 Notes: 2912 The dimensions of the sequential matrix in each processor MUST be the same. 2913 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2914 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2915 @*/ 2916 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2917 { 2918 PetscErrorCode ierr; 2919 MPI_Comm comm=mpimat->comm; 2920 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2921 PetscMPIInt size,rank,taga,*len_s; 2922 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2923 PetscInt proc,m; 2924 PetscInt **buf_ri,**buf_rj; 2925 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2926 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2927 MPI_Request *s_waits,*r_waits; 2928 MPI_Status *status; 2929 MatScalar *aa=a->a,**abuf_r,*ba_i; 2930 Mat_Merge_SeqsToMPI *merge; 2931 PetscObjectContainer container; 2932 2933 PetscFunctionBegin; 2934 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2935 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2936 2937 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2938 if (container) { 2939 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2940 } 2941 bi = merge->bi; 2942 bj = merge->bj; 2943 buf_ri = merge->buf_ri; 2944 buf_rj = merge->buf_rj; 2945 2946 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2947 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2948 len_s = merge->len_s; 2949 2950 /* send and recv matrix values */ 2951 /*-----------------------------*/ 2952 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2953 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2954 2955 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2956 for (proc=0,k=0; proc<size; proc++){ 2957 if (!len_s[proc]) continue; 2958 i = owners[proc]; 2959 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2960 k++; 2961 } 2962 2963 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2964 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2965 ierr = PetscFree(status);CHKERRQ(ierr); 2966 2967 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2968 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2969 2970 /* insert mat values of mpimat */ 2971 /*----------------------------*/ 2972 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2973 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2974 nextrow = buf_ri_k + merge->nrecv; 2975 nextai = nextrow + merge->nrecv; 2976 2977 for (k=0; k<merge->nrecv; k++){ 2978 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2979 nrows = *(buf_ri_k[k]); 2980 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2981 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2982 } 2983 2984 /* set values of ba */ 2985 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2986 for (i=0; i<m; i++) { 2987 arow = owners[rank] + i; 2988 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2989 bnzi = bi[i+1] - bi[i]; 2990 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2991 2992 /* add local non-zero vals of this proc's seqmat into ba */ 2993 anzi = ai[arow+1] - ai[arow]; 2994 aj = a->j + ai[arow]; 2995 aa = a->a + ai[arow]; 2996 nextaj = 0; 2997 for (j=0; nextaj<anzi; j++){ 2998 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2999 ba_i[j] += aa[nextaj++]; 3000 } 3001 } 3002 3003 /* add received vals into ba */ 3004 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3005 /* i-th row */ 3006 if (i == *nextrow[k]) { 3007 anzi = *(nextai[k]+1) - *nextai[k]; 3008 aj = buf_rj[k] + *(nextai[k]); 3009 aa = abuf_r[k] + *(nextai[k]); 3010 nextaj = 0; 3011 for (j=0; nextaj<anzi; j++){ 3012 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3013 ba_i[j] += aa[nextaj++]; 3014 } 3015 } 3016 nextrow[k]++; nextai[k]++; 3017 } 3018 } 3019 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3020 } 3021 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3022 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3023 3024 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3025 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3026 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3027 3028 PetscFunctionReturn(0); 3029 } 3030 3031 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3032 { 3033 PetscErrorCode ierr; 3034 Mat B_mpi; 3035 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3036 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3037 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3038 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3039 PetscInt len,proc,*dnz,*onz; 3040 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3041 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3042 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3043 MPI_Status *status; 3044 MatScalar *ba; 3045 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3046 PetscBT lnkbt; 3047 Mat_Merge_SeqsToMPI *merge; 3048 PetscObjectContainer container; 3049 3050 PetscFunctionBegin; 3051 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3052 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3053 3054 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3055 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3056 3057 /* determine the number of messages to send, their lengths */ 3058 /*---------------------------------------------------------*/ 3059 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3060 if (m == PETSC_DECIDE) { 3061 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3062 } else { 3063 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3064 } 3065 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3066 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3067 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3068 3069 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3070 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3071 len_s = merge->len_s; 3072 3073 len = 0; /* length of buf_si[] */ 3074 merge->nsend = 0; 3075 for (proc=0; proc<size; proc++){ 3076 len_si[proc] = 0; 3077 if (proc == rank){ 3078 len_si[proc] = len_s[proc] = 0; 3079 } else { 3080 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3081 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3082 } 3083 if (len_s[proc]) { 3084 merge->nsend++; 3085 nrows = 0; 3086 for (i=owners[proc]; i<owners[proc+1]; i++){ 3087 if (ai[i+1] > ai[i]) nrows++; 3088 } 3089 len_si[proc] = 2*(nrows+1); 3090 len += len_si[proc]; 3091 } 3092 } 3093 3094 /* determine the number and length of messages to receive for ij-structure */ 3095 /*-------------------------------------------------------------------------*/ 3096 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3097 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3098 3099 /* post the Irecv of j-structure */ 3100 /*-------------------------------*/ 3101 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3102 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3103 3104 /* post the Isend of j-structure */ 3105 /*--------------------------------*/ 3106 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3107 sj_waits = si_waits + merge->nsend; 3108 3109 for (proc=0, k=0; proc<size; proc++){ 3110 if (!len_s[proc]) continue; 3111 i = owners[proc]; 3112 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3113 k++; 3114 } 3115 3116 /* receives and sends of j-structure are complete */ 3117 /*------------------------------------------------*/ 3118 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3119 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3120 3121 /* send and recv i-structure */ 3122 /*---------------------------*/ 3123 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3124 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3125 3126 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3127 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3128 for (proc=0,k=0; proc<size; proc++){ 3129 if (!len_s[proc]) continue; 3130 /* form outgoing message for i-structure: 3131 buf_si[0]: nrows to be sent 3132 [1:nrows]: row index (global) 3133 [nrows+1:2*nrows+1]: i-structure index 3134 */ 3135 /*-------------------------------------------*/ 3136 nrows = len_si[proc]/2 - 1; 3137 buf_si_i = buf_si + nrows+1; 3138 buf_si[0] = nrows; 3139 buf_si_i[0] = 0; 3140 nrows = 0; 3141 for (i=owners[proc]; i<owners[proc+1]; i++){ 3142 anzi = ai[i+1] - ai[i]; 3143 if (anzi) { 3144 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3145 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3146 nrows++; 3147 } 3148 } 3149 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3150 k++; 3151 buf_si += len_si[proc]; 3152 } 3153 3154 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3155 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3156 3157 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3158 for (i=0; i<merge->nrecv; i++){ 3159 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3160 } 3161 3162 ierr = PetscFree(len_si);CHKERRQ(ierr); 3163 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3164 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3165 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3166 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3167 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3168 ierr = PetscFree(status);CHKERRQ(ierr); 3169 3170 /* compute a local seq matrix in each processor */ 3171 /*----------------------------------------------*/ 3172 /* allocate bi array and free space for accumulating nonzero column info */ 3173 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3174 bi[0] = 0; 3175 3176 /* create and initialize a linked list */ 3177 nlnk = N+1; 3178 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3179 3180 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3181 len = 0; 3182 len = ai[owners[rank+1]] - ai[owners[rank]]; 3183 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3184 current_space = free_space; 3185 3186 /* determine symbolic info for each local row */ 3187 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3188 nextrow = buf_ri_k + merge->nrecv; 3189 nextai = nextrow + merge->nrecv; 3190 for (k=0; k<merge->nrecv; k++){ 3191 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3192 nrows = *buf_ri_k[k]; 3193 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3194 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3195 } 3196 3197 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3198 len = 0; 3199 for (i=0;i<m;i++) { 3200 bnzi = 0; 3201 /* add local non-zero cols of this proc's seqmat into lnk */ 3202 arow = owners[rank] + i; 3203 anzi = ai[arow+1] - ai[arow]; 3204 aj = a->j + ai[arow]; 3205 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3206 bnzi += nlnk; 3207 /* add received col data into lnk */ 3208 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3209 if (i == *nextrow[k]) { /* i-th row */ 3210 anzi = *(nextai[k]+1) - *nextai[k]; 3211 aj = buf_rj[k] + *nextai[k]; 3212 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3213 bnzi += nlnk; 3214 nextrow[k]++; nextai[k]++; 3215 } 3216 } 3217 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3218 3219 /* if free space is not available, make more free space */ 3220 if (current_space->local_remaining<bnzi) { 3221 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3222 nspacedouble++; 3223 } 3224 /* copy data into free space, then initialize lnk */ 3225 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3226 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3227 3228 current_space->array += bnzi; 3229 current_space->local_used += bnzi; 3230 current_space->local_remaining -= bnzi; 3231 3232 bi[i+1] = bi[i] + bnzi; 3233 } 3234 3235 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3236 3237 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3238 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3239 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3240 3241 /* create symbolic parallel matrix B_mpi */ 3242 /*---------------------------------------*/ 3243 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3244 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3245 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3246 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3247 3248 ierr = PetscMalloc((len+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3249 ierr = PetscMemzero(ba,(len+1)*sizeof(MatScalar));CHKERRQ(ierr); 3250 for (i=0;i<m;i++) { 3251 k = i + +owners[rank]; 3252 bnzi = bi[i+1] - bi[i]; 3253 ierr = MatSetValues(B_mpi,1,&k,bnzi,bj+bi[i],ba,INSERT_VALUES);CHKERRQ(ierr); 3254 } 3255 ierr = MatAssemblyBegin(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3256 ierr = MatAssemblyEnd(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3257 ierr = PetscFree(ba);CHKERRQ(ierr); 3258 3259 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3260 merge->bi = bi; 3261 merge->bj = bj; 3262 merge->buf_ri = buf_ri; 3263 merge->buf_rj = buf_rj; 3264 merge->C_seq = seqmat; 3265 3266 /* attach the supporting struct to B_mpi for reuse */ 3267 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3268 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3269 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3270 *mpimat = B_mpi; 3271 3272 PetscFunctionReturn(0); 3273 } 3274 3275 static PetscEvent logkey_seqstompi = 0; 3276 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3277 { 3278 PetscErrorCode ierr; 3279 3280 PetscFunctionBegin; 3281 if (!logkey_seqstompi) { 3282 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3283 } 3284 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3285 if (scall == MAT_INITIAL_MATRIX){ 3286 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3287 } 3288 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3289 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3290 PetscFunctionReturn(0); 3291 } 3292 3293 static PetscEvent logkey_getlocalmat = 0; 3294 #undef __FUNCT__ 3295 #define __FUNCT__ "MatGetLocalMat" 3296 /*@C 3297 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3298 3299 Collective on Mat 3300 3301 Input Parameters: 3302 + A - the matrix 3303 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3304 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3305 3306 Output Parameter: 3307 . A_loc - the local sequential matrix generated 3308 3309 Level: developer 3310 3311 @*/ 3312 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3313 { 3314 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3315 PetscErrorCode ierr; 3316 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3317 IS isrowa,iscola; 3318 Mat *aloc; 3319 3320 PetscFunctionBegin; 3321 if (!logkey_getlocalmat) { 3322 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3323 } 3324 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3325 if (!row){ 3326 start = a->rstart; end = a->rend; 3327 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3328 } else { 3329 isrowa = *row; 3330 } 3331 if (!col){ 3332 start = a->cstart; 3333 cmap = a->garray; 3334 nzA = a->A->n; 3335 nzB = a->B->n; 3336 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3337 ncols = 0; 3338 for (i=0; i<nzB; i++) { 3339 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3340 else break; 3341 } 3342 imark = i; 3343 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3344 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3345 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3346 ierr = PetscFree(idx);CHKERRQ(ierr); 3347 } else { 3348 iscola = *col; 3349 } 3350 if (scall != MAT_INITIAL_MATRIX){ 3351 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3352 aloc[0] = *A_loc; 3353 } 3354 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3355 *A_loc = aloc[0]; 3356 ierr = PetscFree(aloc);CHKERRQ(ierr); 3357 if (!row){ 3358 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3359 } 3360 if (!col){ 3361 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3362 } 3363 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3364 PetscFunctionReturn(0); 3365 } 3366 3367 static PetscEvent logkey_GetBrowsOfAcols = 0; 3368 #undef __FUNCT__ 3369 #define __FUNCT__ "MatGetBrowsOfAcols" 3370 /*@C 3371 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero col of A 3372 3373 Collective on Mat 3374 3375 Input Parameters: 3376 + A,B - the matrices in mpiaij format 3377 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3378 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3379 3380 Output Parameter: 3381 + rowb, colb - index sets of rows and columns of B to extract 3382 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3383 - B_seq - the sequential matrix generated 3384 3385 Level: developer 3386 3387 @*/ 3388 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3389 { 3390 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3391 PetscErrorCode ierr; 3392 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3393 IS isrowb,iscolb; 3394 Mat *bseq; 3395 3396 PetscFunctionBegin; 3397 if (a->cstart != b->rstart || a->cend != b->rend){ 3398 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3399 } 3400 if (!logkey_GetBrowsOfAcols) { 3401 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3402 } 3403 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3404 3405 if (scall == MAT_INITIAL_MATRIX){ 3406 start = a->cstart; 3407 cmap = a->garray; 3408 nzA = a->A->n; 3409 nzB = a->B->n; 3410 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3411 ncols = 0; 3412 for (i=0; i<nzB; i++) { /* row < local row index */ 3413 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3414 else break; 3415 } 3416 imark = i; 3417 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3418 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3419 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3420 ierr = PetscFree(idx);CHKERRQ(ierr); 3421 *brstart = imark; 3422 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3423 } else { 3424 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3425 isrowb = *rowb; iscolb = *colb; 3426 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3427 bseq[0] = *B_seq; 3428 } 3429 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3430 *B_seq = bseq[0]; 3431 ierr = PetscFree(bseq);CHKERRQ(ierr); 3432 if (!rowb){ 3433 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3434 } else { 3435 *rowb = isrowb; 3436 } 3437 if (!colb){ 3438 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3439 } else { 3440 *colb = iscolb; 3441 } 3442 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3443 PetscFunctionReturn(0); 3444 } 3445 3446 static PetscEvent logkey_GetBrowsOfAocols = 0; 3447 #undef __FUNCT__ 3448 #define __FUNCT__ "MatGetBrowsOfAoCols" 3449 /*@C 3450 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3451 of the off-diagonal portion of A 3452 3453 Collective on Mat 3454 3455 Input Parameters: 3456 + A,B - the matrices in mpiaij format 3457 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3458 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3459 3460 Output Parameter: 3461 + rowb, colb - index sets of rows and columns of B to extract 3462 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3463 - B_seq - the sequential matrix generated 3464 3465 Level: developer 3466 3467 @*/ 3468 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3469 { 3470 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3471 PetscErrorCode ierr; 3472 PetscInt *idx,i,start,ncols,nzB,*cmap,imark; 3473 IS isrowb,iscolb; 3474 Mat *bseq; 3475 3476 PetscFunctionBegin; 3477 if (a->cstart != b->rstart || a->cend != b->rend){ 3478 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3479 } 3480 if (!logkey_GetBrowsOfAocols) { 3481 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrowsOfAoCols",MAT_COOKIE); 3482 } 3483 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3484 3485 if (scall == MAT_INITIAL_MATRIX){ 3486 start = a->cstart; 3487 cmap = a->garray; 3488 nzB = a->B->n; 3489 if (!nzB){ /* output a null matrix */ 3490 B_seq = PETSC_NULL; 3491 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3492 PetscFunctionReturn(0); 3493 } 3494 ierr = PetscMalloc(nzB*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3495 ncols = 0; 3496 for (i=0; i<nzB; i++) { /* row < local row index */ 3497 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3498 else break; 3499 } 3500 imark = i; 3501 /* for (i=0; i<nzA; i++) idx[ncols++] = start + i; */ /* local rows */ 3502 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3503 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3504 ierr = PetscFree(idx);CHKERRQ(ierr); 3505 *brstart = imark; 3506 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3507 } else { 3508 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3509 isrowb = *rowb; iscolb = *colb; 3510 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3511 bseq[0] = *B_seq; 3512 } 3513 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3514 *B_seq = bseq[0]; 3515 ierr = PetscFree(bseq);CHKERRQ(ierr); 3516 if (!rowb){ 3517 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3518 } else { 3519 *rowb = isrowb; 3520 } 3521 if (!colb){ 3522 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3523 } else { 3524 *colb = iscolb; 3525 } 3526 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3527 PetscFunctionReturn(0); 3528 } 3529