1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 PetscInt new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (PetscInt*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(PetscInt));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 PetscInt new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (PetscInt*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(PetscInt));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 PetscErrorCode ierr; 191 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 192 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 PetscInt *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 PetscInt nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 PetscErrorCode ierr; 277 PetscInt i,j,rstart = aij->rstart,rend = aij->rend; 278 PetscInt cstart = aij->cstart,cend = aij->cend,row,col; 279 280 PetscFunctionBegin; 281 for (i=0; i<m; i++) { 282 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); 283 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1); 284 if (idxm[i] >= rstart && idxm[i] < rend) { 285 row = idxm[i] - rstart; 286 for (j=0; j<n; j++) { 287 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); 288 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1); 289 if (idxn[j] >= cstart && idxn[j] < cend){ 290 col = idxn[j] - cstart; 291 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 292 } else { 293 if (!aij->colmap) { 294 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined (PETSC_USE_CTABLE) 297 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 298 col --; 299 #else 300 col = aij->colmap[idxn[j]] - 1; 301 #endif 302 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 303 else { 304 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 305 } 306 } 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 310 } 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 318 { 319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 320 PetscErrorCode ierr; 321 PetscInt nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 337 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 338 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs); 339 PetscFunctionReturn(0); 340 } 341 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 346 { 347 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 348 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 349 PetscErrorCode ierr; 350 PetscMPIInt n; 351 PetscInt i,j,rstart,ncols,flg; 352 PetscInt *row,*col,other_disassembled; 353 PetscScalar *val; 354 InsertMode addv = mat->insertmode; 355 356 PetscFunctionBegin; 357 if (!aij->donotstash) { 358 while (1) { 359 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 360 if (!flg) break; 361 362 for (i=0; i<n;) { 363 /* Now identify the consecutive vals belonging to the same row */ 364 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 365 if (j < n) ncols = j-i; 366 else ncols = n-i; 367 /* Now assemble all these values with a single function call */ 368 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 369 i = j; 370 } 371 } 372 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 373 } 374 375 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 376 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 377 378 /* determine if any processor has disassembled, if so we must 379 also disassemble ourselfs, in order that we may reassemble. */ 380 /* 381 if nonzero structure of submatrix B cannot change then we know that 382 no processor disassembled thus we can skip this stuff 383 */ 384 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 385 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 386 if (mat->was_assembled && !other_disassembled) { 387 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 388 /* reaccess the b because aij->B was changed */ 389 b = (Mat_SeqAIJ *)aij->B->data; 390 } 391 } 392 393 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 398 399 if (aij->rowvalues) { 400 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 401 aij->rowvalues = 0; 402 } 403 404 /* used by MatAXPY() */ 405 a->xtoy = 0; b->xtoy = 0; 406 a->XtoY = 0; b->XtoY = 0; 407 408 PetscFunctionReturn(0); 409 } 410 411 #undef __FUNCT__ 412 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 413 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 414 { 415 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 416 PetscErrorCode ierr; 417 418 PetscFunctionBegin; 419 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 420 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatZeroRows_MPIAIJ" 426 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 427 { 428 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 429 PetscErrorCode ierr; 430 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag; 431 PetscInt i,N,*rows,*owners = l->rowners; 432 PetscInt *nprocs,j,idx,nsends,row; 433 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 434 PetscInt *rvalues,count,base,slen,*source; 435 PetscInt *lens,*lrows,*values,rstart=l->rstart; 436 MPI_Comm comm = A->comm; 437 MPI_Request *send_waits,*recv_waits; 438 MPI_Status recv_status,*send_status; 439 IS istmp; 440 PetscTruth found; 441 442 PetscFunctionBegin; 443 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 444 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 445 446 /* first count number of contributors to each processor */ 447 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 448 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 449 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 450 for (i=0; i<N; i++) { 451 idx = rows[i]; 452 found = PETSC_FALSE; 453 for (j=0; j<size; j++) { 454 if (idx >= owners[j] && idx < owners[j+1]) { 455 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 456 } 457 } 458 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 459 } 460 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 461 462 /* inform other processors of number of messages and max length*/ 463 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 464 465 /* post receives: */ 466 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 467 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 468 for (i=0; i<nrecvs; i++) { 469 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 470 } 471 472 /* do sends: 473 1) starts[i] gives the starting index in svalues for stuff going to 474 the ith processor 475 */ 476 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 477 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 478 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 479 starts[0] = 0; 480 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 481 for (i=0; i<N; i++) { 482 svalues[starts[owner[i]]++] = rows[i]; 483 } 484 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 485 486 starts[0] = 0; 487 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 488 count = 0; 489 for (i=0; i<size; i++) { 490 if (nprocs[2*i+1]) { 491 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 492 } 493 } 494 ierr = PetscFree(starts);CHKERRQ(ierr); 495 496 base = owners[rank]; 497 498 /* wait on receives */ 499 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr); 500 source = lens + nrecvs; 501 count = nrecvs; slen = 0; 502 while (count) { 503 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 504 /* unpack receives into our local space */ 505 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 506 source[imdex] = recv_status.MPI_SOURCE; 507 lens[imdex] = n; 508 slen += n; 509 count--; 510 } 511 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 512 513 /* move the data into the send scatter */ 514 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 515 count = 0; 516 for (i=0; i<nrecvs; i++) { 517 values = rvalues + i*nmax; 518 for (j=0; j<lens[i]; j++) { 519 lrows[count++] = values[j] - base; 520 } 521 } 522 ierr = PetscFree(rvalues);CHKERRQ(ierr); 523 ierr = PetscFree(lens);CHKERRQ(ierr); 524 ierr = PetscFree(owner);CHKERRQ(ierr); 525 ierr = PetscFree(nprocs);CHKERRQ(ierr); 526 527 /* actually zap the local rows */ 528 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 529 PetscLogObjectParent(A,istmp); 530 531 /* 532 Zero the required rows. If the "diagonal block" of the matrix 533 is square and the user wishes to set the diagonal we use seperate 534 code so that MatSetValues() is not called for each diagonal allocating 535 new memory, thus calling lots of mallocs and slowing things down. 536 537 Contributed by: Mathew Knepley 538 */ 539 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 540 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 541 if (diag && (l->A->M == l->A->N)) { 542 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 543 } else if (diag) { 544 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 545 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 546 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 547 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 548 } 549 for (i = 0; i < slen; i++) { 550 row = lrows[i] + rstart; 551 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 552 } 553 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 555 } else { 556 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 557 } 558 ierr = ISDestroy(istmp);CHKERRQ(ierr); 559 ierr = PetscFree(lrows);CHKERRQ(ierr); 560 561 /* wait on sends */ 562 if (nsends) { 563 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 564 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 565 ierr = PetscFree(send_status);CHKERRQ(ierr); 566 } 567 ierr = PetscFree(send_waits);CHKERRQ(ierr); 568 ierr = PetscFree(svalues);CHKERRQ(ierr); 569 570 PetscFunctionReturn(0); 571 } 572 573 #undef __FUNCT__ 574 #define __FUNCT__ "MatMult_MPIAIJ" 575 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 576 { 577 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 578 PetscErrorCode ierr; 579 PetscInt nt; 580 581 PetscFunctionBegin; 582 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 583 if (nt != A->n) { 584 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt); 585 } 586 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 587 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 588 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 589 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 590 PetscFunctionReturn(0); 591 } 592 593 #undef __FUNCT__ 594 #define __FUNCT__ "MatMultAdd_MPIAIJ" 595 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 596 { 597 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 598 PetscErrorCode ierr; 599 600 PetscFunctionBegin; 601 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 603 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 604 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 610 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 611 { 612 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 613 PetscErrorCode ierr; 614 615 PetscFunctionBegin; 616 /* do nondiagonal part */ 617 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 618 /* send it on its way */ 619 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 620 /* do local part */ 621 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 622 /* receive remote parts: note this assumes the values are not actually */ 623 /* inserted in yy until the next line, which is true for my implementation*/ 624 /* but is not perhaps always true. */ 625 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 626 PetscFunctionReturn(0); 627 } 628 629 EXTERN_C_BEGIN 630 #undef __FUNCT__ 631 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 632 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 633 { 634 MPI_Comm comm; 635 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 636 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 637 IS Me,Notme; 638 PetscErrorCode ierr; 639 PetscInt M,N,first,last,*notme,i; 640 PetscMPIInt size; 641 642 PetscFunctionBegin; 643 644 /* Easy test: symmetric diagonal block */ 645 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 646 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 647 if (!*f) PetscFunctionReturn(0); 648 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 649 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 650 if (size == 1) PetscFunctionReturn(0); 651 652 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 653 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 654 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 655 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 656 for (i=0; i<first; i++) notme[i] = i; 657 for (i=last; i<M; i++) notme[i-last+first] = i; 658 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 659 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 660 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 661 Aoff = Aoffs[0]; 662 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 663 Boff = Boffs[0]; 664 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 665 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 666 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 667 ierr = ISDestroy(Me);CHKERRQ(ierr); 668 ierr = ISDestroy(Notme);CHKERRQ(ierr); 669 670 PetscFunctionReturn(0); 671 } 672 EXTERN_C_END 673 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 676 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 677 { 678 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 679 PetscErrorCode ierr; 680 681 PetscFunctionBegin; 682 /* do nondiagonal part */ 683 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 684 /* send it on its way */ 685 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 686 /* do local part */ 687 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 688 /* receive remote parts: note this assumes the values are not actually */ 689 /* inserted in yy until the next line, which is true for my implementation*/ 690 /* but is not perhaps always true. */ 691 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 /* 696 This only works correctly for square matrices where the subblock A->A is the 697 diagonal block 698 */ 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 701 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 702 { 703 PetscErrorCode ierr; 704 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 705 706 PetscFunctionBegin; 707 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 708 if (a->rstart != a->cstart || a->rend != a->cend) { 709 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 710 } 711 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatScale_MPIAIJ" 717 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 718 { 719 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 720 PetscErrorCode ierr; 721 722 PetscFunctionBegin; 723 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 724 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 725 PetscFunctionReturn(0); 726 } 727 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatDestroy_MPIAIJ" 730 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 731 { 732 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 733 PetscErrorCode ierr; 734 735 PetscFunctionBegin; 736 #if defined(PETSC_USE_LOG) 737 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N); 738 #endif 739 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 740 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 741 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 742 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 743 #if defined (PETSC_USE_CTABLE) 744 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 745 #else 746 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 747 #endif 748 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 749 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 750 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 751 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 752 ierr = PetscFree(aij);CHKERRQ(ierr); 753 754 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 755 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 756 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 757 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 758 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 759 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 760 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNCT__ 765 #define __FUNCT__ "MatView_MPIAIJ_Binary" 766 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 767 { 768 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 769 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 770 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 771 PetscErrorCode ierr; 772 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 773 int fd; 774 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 775 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 776 PetscScalar *column_values; 777 778 PetscFunctionBegin; 779 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 780 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 781 nz = A->nz + B->nz; 782 if (!rank) { 783 header[0] = MAT_FILE_COOKIE; 784 header[1] = mat->M; 785 header[2] = mat->N; 786 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 787 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 788 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 789 /* get largest number of rows any processor has */ 790 rlen = mat->m; 791 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 792 for (i=1; i<size; i++) { 793 rlen = PetscMax(rlen,range[i+1] - range[i]); 794 } 795 } else { 796 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 797 rlen = mat->m; 798 } 799 800 /* load up the local row counts */ 801 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 802 for (i=0; i<mat->m; i++) { 803 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 804 } 805 806 /* store the row lengths to the file */ 807 if (!rank) { 808 MPI_Status status; 809 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 810 for (i=1; i<size; i++) { 811 rlen = range[i+1] - range[i]; 812 ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 813 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 814 } 815 } else { 816 ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 817 } 818 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 819 820 /* load up the local column indices */ 821 nzmax = nz; /* )th processor needs space a largest processor needs */ 822 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 823 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 824 cnt = 0; 825 for (i=0; i<mat->m; i++) { 826 for (j=B->i[i]; j<B->i[i+1]; j++) { 827 if ( (col = garray[B->j[j]]) > cstart) break; 828 column_indices[cnt++] = col; 829 } 830 for (k=A->i[i]; k<A->i[i+1]; k++) { 831 column_indices[cnt++] = A->j[k] + cstart; 832 } 833 for (; j<B->i[i+1]; j++) { 834 column_indices[cnt++] = garray[B->j[j]]; 835 } 836 } 837 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 838 839 /* store the column indices to the file */ 840 if (!rank) { 841 MPI_Status status; 842 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 843 for (i=1; i<size; i++) { 844 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 845 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 846 ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 847 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 848 } 849 } else { 850 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 851 ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 852 } 853 ierr = PetscFree(column_indices);CHKERRQ(ierr); 854 855 /* load up the local column values */ 856 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 857 cnt = 0; 858 for (i=0; i<mat->m; i++) { 859 for (j=B->i[i]; j<B->i[i+1]; j++) { 860 if ( garray[B->j[j]] > cstart) break; 861 column_values[cnt++] = B->a[j]; 862 } 863 for (k=A->i[i]; k<A->i[i+1]; k++) { 864 column_values[cnt++] = A->a[k]; 865 } 866 for (; j<B->i[i+1]; j++) { 867 column_values[cnt++] = B->a[j]; 868 } 869 } 870 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 871 872 /* store the column values to the file */ 873 if (!rank) { 874 MPI_Status status; 875 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 876 for (i=1; i<size; i++) { 877 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 878 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 879 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 880 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 881 } 882 } else { 883 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr); 884 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 885 } 886 ierr = PetscFree(column_values);CHKERRQ(ierr); 887 PetscFunctionReturn(0); 888 } 889 890 #undef __FUNCT__ 891 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 892 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 893 { 894 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 895 PetscErrorCode ierr; 896 PetscMPIInt rank = aij->rank,size = aij->size; 897 PetscTruth isdraw,iascii,flg,isbinary; 898 PetscViewer sviewer; 899 PetscViewerFormat format; 900 901 PetscFunctionBegin; 902 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 903 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 904 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 905 if (iascii) { 906 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 907 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 908 MatInfo info; 909 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 910 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 911 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 912 if (flg) { 913 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 914 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 915 } else { 916 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 917 rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 918 } 919 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 920 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 921 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 922 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 923 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 924 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 925 PetscFunctionReturn(0); 926 } else if (format == PETSC_VIEWER_ASCII_INFO) { 927 PetscFunctionReturn(0); 928 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 929 PetscFunctionReturn(0); 930 } 931 } else if (isbinary) { 932 if (size == 1) { 933 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 934 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 935 } else { 936 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 937 } 938 PetscFunctionReturn(0); 939 } else if (isdraw) { 940 PetscDraw draw; 941 PetscTruth isnull; 942 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 943 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 944 } 945 946 if (size == 1) { 947 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 948 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 949 } else { 950 /* assemble the entire matrix onto first processor. */ 951 Mat A; 952 Mat_SeqAIJ *Aloc; 953 PetscInt M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 954 PetscScalar *a; 955 956 if (!rank) { 957 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 958 } else { 959 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 960 } 961 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 962 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 963 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 964 PetscLogObjectParent(mat,A); 965 966 /* copy over the A part */ 967 Aloc = (Mat_SeqAIJ*)aij->A->data; 968 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 969 row = aij->rstart; 970 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 971 for (i=0; i<m; i++) { 972 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 973 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 974 } 975 aj = Aloc->j; 976 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 977 978 /* copy over the B part */ 979 Aloc = (Mat_SeqAIJ*)aij->B->data; 980 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 981 row = aij->rstart; 982 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 983 ct = cols; 984 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 985 for (i=0; i<m; i++) { 986 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 987 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 988 } 989 ierr = PetscFree(ct);CHKERRQ(ierr); 990 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 991 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 992 /* 993 Everyone has to call to draw the matrix since the graphics waits are 994 synchronized across all processors that share the PetscDraw object 995 */ 996 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 997 if (!rank) { 998 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 999 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1000 } 1001 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1002 ierr = MatDestroy(A);CHKERRQ(ierr); 1003 } 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatView_MPIAIJ" 1009 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1010 { 1011 PetscErrorCode ierr; 1012 PetscTruth iascii,isdraw,issocket,isbinary; 1013 1014 PetscFunctionBegin; 1015 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1016 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1017 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1018 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1019 if (iascii || isdraw || isbinary || issocket) { 1020 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1021 } else { 1022 SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1023 } 1024 PetscFunctionReturn(0); 1025 } 1026 1027 1028 1029 #undef __FUNCT__ 1030 #define __FUNCT__ "MatRelax_MPIAIJ" 1031 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1032 { 1033 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1034 PetscErrorCode ierr; 1035 Vec bb1; 1036 PetscScalar mone=-1.0; 1037 1038 PetscFunctionBegin; 1039 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1040 1041 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1042 1043 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1044 if (flag & SOR_ZERO_INITIAL_GUESS) { 1045 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1046 its--; 1047 } 1048 1049 while (its--) { 1050 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1051 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1052 1053 /* update rhs: bb1 = bb - B*x */ 1054 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1055 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1056 1057 /* local sweep */ 1058 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1059 CHKERRQ(ierr); 1060 } 1061 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1062 if (flag & SOR_ZERO_INITIAL_GUESS) { 1063 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1064 its--; 1065 } 1066 while (its--) { 1067 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1068 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1069 1070 /* update rhs: bb1 = bb - B*x */ 1071 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1072 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1073 1074 /* local sweep */ 1075 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1076 CHKERRQ(ierr); 1077 } 1078 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1079 if (flag & SOR_ZERO_INITIAL_GUESS) { 1080 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1081 its--; 1082 } 1083 while (its--) { 1084 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1085 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1086 1087 /* update rhs: bb1 = bb - B*x */ 1088 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1089 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1090 1091 /* local sweep */ 1092 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1093 CHKERRQ(ierr); 1094 } 1095 } else { 1096 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1097 } 1098 1099 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1105 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1106 { 1107 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1108 Mat A = mat->A,B = mat->B; 1109 PetscErrorCode ierr; 1110 PetscReal isend[5],irecv[5]; 1111 1112 PetscFunctionBegin; 1113 info->block_size = 1.0; 1114 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1115 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1116 isend[3] = info->memory; isend[4] = info->mallocs; 1117 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1118 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1119 isend[3] += info->memory; isend[4] += info->mallocs; 1120 if (flag == MAT_LOCAL) { 1121 info->nz_used = isend[0]; 1122 info->nz_allocated = isend[1]; 1123 info->nz_unneeded = isend[2]; 1124 info->memory = isend[3]; 1125 info->mallocs = isend[4]; 1126 } else if (flag == MAT_GLOBAL_MAX) { 1127 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1128 info->nz_used = irecv[0]; 1129 info->nz_allocated = irecv[1]; 1130 info->nz_unneeded = irecv[2]; 1131 info->memory = irecv[3]; 1132 info->mallocs = irecv[4]; 1133 } else if (flag == MAT_GLOBAL_SUM) { 1134 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1135 info->nz_used = irecv[0]; 1136 info->nz_allocated = irecv[1]; 1137 info->nz_unneeded = irecv[2]; 1138 info->memory = irecv[3]; 1139 info->mallocs = irecv[4]; 1140 } 1141 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1142 info->fill_ratio_needed = 0; 1143 info->factor_mallocs = 0; 1144 info->rows_global = (double)matin->M; 1145 info->columns_global = (double)matin->N; 1146 info->rows_local = (double)matin->m; 1147 info->columns_local = (double)matin->N; 1148 1149 PetscFunctionReturn(0); 1150 } 1151 1152 #undef __FUNCT__ 1153 #define __FUNCT__ "MatSetOption_MPIAIJ" 1154 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1155 { 1156 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1157 PetscErrorCode ierr; 1158 1159 PetscFunctionBegin; 1160 switch (op) { 1161 case MAT_NO_NEW_NONZERO_LOCATIONS: 1162 case MAT_YES_NEW_NONZERO_LOCATIONS: 1163 case MAT_COLUMNS_UNSORTED: 1164 case MAT_COLUMNS_SORTED: 1165 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1166 case MAT_KEEP_ZEROED_ROWS: 1167 case MAT_NEW_NONZERO_LOCATION_ERR: 1168 case MAT_USE_INODES: 1169 case MAT_DO_NOT_USE_INODES: 1170 case MAT_IGNORE_ZERO_ENTRIES: 1171 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1172 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1173 break; 1174 case MAT_ROW_ORIENTED: 1175 a->roworiented = PETSC_TRUE; 1176 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1177 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1178 break; 1179 case MAT_ROWS_SORTED: 1180 case MAT_ROWS_UNSORTED: 1181 case MAT_YES_NEW_DIAGONALS: 1182 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1183 break; 1184 case MAT_COLUMN_ORIENTED: 1185 a->roworiented = PETSC_FALSE; 1186 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1187 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1188 break; 1189 case MAT_IGNORE_OFF_PROC_ENTRIES: 1190 a->donotstash = PETSC_TRUE; 1191 break; 1192 case MAT_NO_NEW_DIAGONALS: 1193 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1194 case MAT_SYMMETRIC: 1195 case MAT_STRUCTURALLY_SYMMETRIC: 1196 case MAT_HERMITIAN: 1197 case MAT_SYMMETRY_ETERNAL: 1198 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1199 break; 1200 case MAT_NOT_SYMMETRIC: 1201 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1202 case MAT_NOT_HERMITIAN: 1203 case MAT_NOT_SYMMETRY_ETERNAL: 1204 break; 1205 default: 1206 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1207 } 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatGetRow_MPIAIJ" 1213 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1214 { 1215 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1216 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1217 PetscErrorCode ierr; 1218 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1219 PetscInt nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1220 PetscInt *cmap,*idx_p; 1221 1222 PetscFunctionBegin; 1223 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1224 mat->getrowactive = PETSC_TRUE; 1225 1226 if (!mat->rowvalues && (idx || v)) { 1227 /* 1228 allocate enough space to hold information from the longest row. 1229 */ 1230 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1231 PetscInt max = 1,tmp; 1232 for (i=0; i<matin->m; i++) { 1233 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1234 if (max < tmp) { max = tmp; } 1235 } 1236 ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1237 mat->rowindices = (PetscInt*)(mat->rowvalues + max); 1238 } 1239 1240 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1241 lrow = row - rstart; 1242 1243 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1244 if (!v) {pvA = 0; pvB = 0;} 1245 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1246 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1247 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1248 nztot = nzA + nzB; 1249 1250 cmap = mat->garray; 1251 if (v || idx) { 1252 if (nztot) { 1253 /* Sort by increasing column numbers, assuming A and B already sorted */ 1254 PetscInt imark = -1; 1255 if (v) { 1256 *v = v_p = mat->rowvalues; 1257 for (i=0; i<nzB; i++) { 1258 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1259 else break; 1260 } 1261 imark = i; 1262 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1263 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1264 } 1265 if (idx) { 1266 *idx = idx_p = mat->rowindices; 1267 if (imark > -1) { 1268 for (i=0; i<imark; i++) { 1269 idx_p[i] = cmap[cworkB[i]]; 1270 } 1271 } else { 1272 for (i=0; i<nzB; i++) { 1273 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1274 else break; 1275 } 1276 imark = i; 1277 } 1278 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1279 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1280 } 1281 } else { 1282 if (idx) *idx = 0; 1283 if (v) *v = 0; 1284 } 1285 } 1286 *nz = nztot; 1287 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1288 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1289 PetscFunctionReturn(0); 1290 } 1291 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1294 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1295 { 1296 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1297 1298 PetscFunctionBegin; 1299 if (aij->getrowactive == PETSC_FALSE) { 1300 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1301 } 1302 aij->getrowactive = PETSC_FALSE; 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatNorm_MPIAIJ" 1308 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1309 { 1310 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1311 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1312 PetscErrorCode ierr; 1313 PetscInt i,j,cstart = aij->cstart; 1314 PetscReal sum = 0.0; 1315 PetscScalar *v; 1316 1317 PetscFunctionBegin; 1318 if (aij->size == 1) { 1319 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1320 } else { 1321 if (type == NORM_FROBENIUS) { 1322 v = amat->a; 1323 for (i=0; i<amat->nz; i++) { 1324 #if defined(PETSC_USE_COMPLEX) 1325 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1326 #else 1327 sum += (*v)*(*v); v++; 1328 #endif 1329 } 1330 v = bmat->a; 1331 for (i=0; i<bmat->nz; i++) { 1332 #if defined(PETSC_USE_COMPLEX) 1333 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1334 #else 1335 sum += (*v)*(*v); v++; 1336 #endif 1337 } 1338 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1339 *norm = sqrt(*norm); 1340 } else if (type == NORM_1) { /* max column norm */ 1341 PetscReal *tmp,*tmp2; 1342 PetscInt *jj,*garray = aij->garray; 1343 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1344 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1345 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1346 *norm = 0.0; 1347 v = amat->a; jj = amat->j; 1348 for (j=0; j<amat->nz; j++) { 1349 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1350 } 1351 v = bmat->a; jj = bmat->j; 1352 for (j=0; j<bmat->nz; j++) { 1353 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1354 } 1355 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1356 for (j=0; j<mat->N; j++) { 1357 if (tmp2[j] > *norm) *norm = tmp2[j]; 1358 } 1359 ierr = PetscFree(tmp);CHKERRQ(ierr); 1360 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1361 } else if (type == NORM_INFINITY) { /* max row norm */ 1362 PetscReal ntemp = 0.0; 1363 for (j=0; j<aij->A->m; j++) { 1364 v = amat->a + amat->i[j]; 1365 sum = 0.0; 1366 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1367 sum += PetscAbsScalar(*v); v++; 1368 } 1369 v = bmat->a + bmat->i[j]; 1370 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1371 sum += PetscAbsScalar(*v); v++; 1372 } 1373 if (sum > ntemp) ntemp = sum; 1374 } 1375 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1376 } else { 1377 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1378 } 1379 } 1380 PetscFunctionReturn(0); 1381 } 1382 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatTranspose_MPIAIJ" 1385 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1386 { 1387 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1388 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1389 PetscErrorCode ierr; 1390 PetscInt M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1391 Mat B; 1392 PetscScalar *array; 1393 1394 PetscFunctionBegin; 1395 if (!matout && M != N) { 1396 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1397 } 1398 1399 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1400 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1401 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1402 1403 /* copy over the A part */ 1404 Aloc = (Mat_SeqAIJ*)a->A->data; 1405 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1406 row = a->rstart; 1407 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1408 for (i=0; i<m; i++) { 1409 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1410 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1411 } 1412 aj = Aloc->j; 1413 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1414 1415 /* copy over the B part */ 1416 Aloc = (Mat_SeqAIJ*)a->B->data; 1417 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1418 row = a->rstart; 1419 ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1420 ct = cols; 1421 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1422 for (i=0; i<m; i++) { 1423 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1424 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1425 } 1426 ierr = PetscFree(ct);CHKERRQ(ierr); 1427 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1428 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1429 if (matout) { 1430 *matout = B; 1431 } else { 1432 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1433 } 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1439 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1440 { 1441 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1442 Mat a = aij->A,b = aij->B; 1443 PetscErrorCode ierr; 1444 PetscInt s1,s2,s3; 1445 1446 PetscFunctionBegin; 1447 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1448 if (rr) { 1449 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1450 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1451 /* Overlap communication with computation. */ 1452 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1453 } 1454 if (ll) { 1455 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1456 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1457 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1458 } 1459 /* scale the diagonal block */ 1460 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1461 1462 if (rr) { 1463 /* Do a scatter end and then right scale the off-diagonal block */ 1464 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1465 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1466 } 1467 1468 PetscFunctionReturn(0); 1469 } 1470 1471 1472 #undef __FUNCT__ 1473 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1474 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1475 { 1476 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1477 PetscErrorCode ierr; 1478 1479 PetscFunctionBegin; 1480 if (!a->rank) { 1481 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1482 } 1483 PetscFunctionReturn(0); 1484 } 1485 1486 #undef __FUNCT__ 1487 #define __FUNCT__ "MatSetBlockSize_MPIAIJ" 1488 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs) 1489 { 1490 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1491 PetscErrorCode ierr; 1492 1493 PetscFunctionBegin; 1494 ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr); 1495 ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr); 1496 PetscFunctionReturn(0); 1497 } 1498 #undef __FUNCT__ 1499 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1500 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1501 { 1502 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1503 PetscErrorCode ierr; 1504 1505 PetscFunctionBegin; 1506 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1507 PetscFunctionReturn(0); 1508 } 1509 1510 #undef __FUNCT__ 1511 #define __FUNCT__ "MatEqual_MPIAIJ" 1512 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1513 { 1514 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1515 Mat a,b,c,d; 1516 PetscTruth flg; 1517 PetscErrorCode ierr; 1518 1519 PetscFunctionBegin; 1520 a = matA->A; b = matA->B; 1521 c = matB->A; d = matB->B; 1522 1523 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1524 if (flg == PETSC_TRUE) { 1525 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1526 } 1527 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1528 PetscFunctionReturn(0); 1529 } 1530 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatCopy_MPIAIJ" 1533 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1534 { 1535 PetscErrorCode ierr; 1536 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1537 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1538 1539 PetscFunctionBegin; 1540 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1541 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1542 /* because of the column compression in the off-processor part of the matrix a->B, 1543 the number of columns in a->B and b->B may be different, hence we cannot call 1544 the MatCopy() directly on the two parts. If need be, we can provide a more 1545 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1546 then copying the submatrices */ 1547 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1548 } else { 1549 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1550 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1551 } 1552 PetscFunctionReturn(0); 1553 } 1554 1555 #undef __FUNCT__ 1556 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1557 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1558 { 1559 PetscErrorCode ierr; 1560 1561 PetscFunctionBegin; 1562 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1563 PetscFunctionReturn(0); 1564 } 1565 1566 #include "petscblaslapack.h" 1567 #undef __FUNCT__ 1568 #define __FUNCT__ "MatAXPY_MPIAIJ" 1569 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1570 { 1571 PetscErrorCode ierr; 1572 PetscInt i; 1573 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1574 PetscBLASInt bnz,one=1; 1575 Mat_SeqAIJ *x,*y; 1576 1577 PetscFunctionBegin; 1578 if (str == SAME_NONZERO_PATTERN) { 1579 x = (Mat_SeqAIJ *)xx->A->data; 1580 y = (Mat_SeqAIJ *)yy->A->data; 1581 bnz = (PetscBLASInt)x->nz; 1582 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1583 x = (Mat_SeqAIJ *)xx->B->data; 1584 y = (Mat_SeqAIJ *)yy->B->data; 1585 bnz = (PetscBLASInt)x->nz; 1586 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1587 } else if (str == SUBSET_NONZERO_PATTERN) { 1588 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1589 1590 x = (Mat_SeqAIJ *)xx->B->data; 1591 y = (Mat_SeqAIJ *)yy->B->data; 1592 if (y->xtoy && y->XtoY != xx->B) { 1593 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1594 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1595 } 1596 if (!y->xtoy) { /* get xtoy */ 1597 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1598 y->XtoY = xx->B; 1599 } 1600 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1601 } else { 1602 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1603 } 1604 PetscFunctionReturn(0); 1605 } 1606 1607 /* -------------------------------------------------------------------*/ 1608 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1609 MatGetRow_MPIAIJ, 1610 MatRestoreRow_MPIAIJ, 1611 MatMult_MPIAIJ, 1612 /* 4*/ MatMultAdd_MPIAIJ, 1613 MatMultTranspose_MPIAIJ, 1614 MatMultTransposeAdd_MPIAIJ, 1615 0, 1616 0, 1617 0, 1618 /*10*/ 0, 1619 0, 1620 0, 1621 MatRelax_MPIAIJ, 1622 MatTranspose_MPIAIJ, 1623 /*15*/ MatGetInfo_MPIAIJ, 1624 MatEqual_MPIAIJ, 1625 MatGetDiagonal_MPIAIJ, 1626 MatDiagonalScale_MPIAIJ, 1627 MatNorm_MPIAIJ, 1628 /*20*/ MatAssemblyBegin_MPIAIJ, 1629 MatAssemblyEnd_MPIAIJ, 1630 0, 1631 MatSetOption_MPIAIJ, 1632 MatZeroEntries_MPIAIJ, 1633 /*25*/ MatZeroRows_MPIAIJ, 1634 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_64BIT_INT) 1635 MatLUFactorSymbolic_MPIAIJ_TFS, 1636 #else 1637 0, 1638 #endif 1639 0, 1640 0, 1641 0, 1642 /*30*/ MatSetUpPreallocation_MPIAIJ, 1643 0, 1644 0, 1645 0, 1646 0, 1647 /*35*/ MatDuplicate_MPIAIJ, 1648 0, 1649 0, 1650 0, 1651 0, 1652 /*40*/ MatAXPY_MPIAIJ, 1653 MatGetSubMatrices_MPIAIJ, 1654 MatIncreaseOverlap_MPIAIJ, 1655 MatGetValues_MPIAIJ, 1656 MatCopy_MPIAIJ, 1657 /*45*/ MatPrintHelp_MPIAIJ, 1658 MatScale_MPIAIJ, 1659 0, 1660 0, 1661 0, 1662 /*50*/ MatSetBlockSize_MPIAIJ, 1663 0, 1664 0, 1665 0, 1666 0, 1667 /*55*/ MatFDColoringCreate_MPIAIJ, 1668 0, 1669 MatSetUnfactored_MPIAIJ, 1670 0, 1671 0, 1672 /*60*/ MatGetSubMatrix_MPIAIJ, 1673 MatDestroy_MPIAIJ, 1674 MatView_MPIAIJ, 1675 MatGetPetscMaps_Petsc, 1676 0, 1677 /*65*/ 0, 1678 0, 1679 0, 1680 0, 1681 0, 1682 /*70*/ 0, 1683 0, 1684 MatSetColoring_MPIAIJ, 1685 MatSetValuesAdic_MPIAIJ, 1686 MatSetValuesAdifor_MPIAIJ, 1687 /*75*/ 0, 1688 0, 1689 0, 1690 0, 1691 0, 1692 /*80*/ 0, 1693 0, 1694 0, 1695 0, 1696 /*84*/ MatLoad_MPIAIJ, 1697 0, 1698 0, 1699 0, 1700 0, 1701 0, 1702 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1703 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1704 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1705 MatPtAP_MPIAIJ_MPIAIJ, 1706 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1707 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 1708 0, 1709 0, 1710 0}; 1711 1712 /* ----------------------------------------------------------------------------------------*/ 1713 1714 EXTERN_C_BEGIN 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1717 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1718 { 1719 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1720 PetscErrorCode ierr; 1721 1722 PetscFunctionBegin; 1723 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1724 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1725 PetscFunctionReturn(0); 1726 } 1727 EXTERN_C_END 1728 1729 EXTERN_C_BEGIN 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1732 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1733 { 1734 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1735 PetscErrorCode ierr; 1736 1737 PetscFunctionBegin; 1738 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1739 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1740 PetscFunctionReturn(0); 1741 } 1742 EXTERN_C_END 1743 1744 #include "petscpc.h" 1745 EXTERN_C_BEGIN 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1748 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 1749 { 1750 Mat_MPIAIJ *b; 1751 PetscErrorCode ierr; 1752 PetscInt i; 1753 1754 PetscFunctionBegin; 1755 B->preallocated = PETSC_TRUE; 1756 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1757 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1758 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 1759 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 1760 if (d_nnz) { 1761 for (i=0; i<B->m; i++) { 1762 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 1763 } 1764 } 1765 if (o_nnz) { 1766 for (i=0; i<B->m; i++) { 1767 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 1768 } 1769 } 1770 b = (Mat_MPIAIJ*)B->data; 1771 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1772 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1773 1774 PetscFunctionReturn(0); 1775 } 1776 EXTERN_C_END 1777 1778 /*MC 1779 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1780 1781 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1782 and MATMPIAIJ otherwise. As a result, for single process communicators, 1783 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1784 for communicators controlling multiple processes. It is recommended that you call both of 1785 the above preallocation routines for simplicity. 1786 1787 Options Database Keys: 1788 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1789 1790 Level: beginner 1791 1792 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1793 M*/ 1794 1795 EXTERN_C_BEGIN 1796 #undef __FUNCT__ 1797 #define __FUNCT__ "MatCreate_AIJ" 1798 PetscErrorCode MatCreate_AIJ(Mat A) 1799 { 1800 PetscErrorCode ierr; 1801 PetscMPIInt size; 1802 1803 PetscFunctionBegin; 1804 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1805 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1806 if (size == 1) { 1807 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1808 } else { 1809 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1810 } 1811 PetscFunctionReturn(0); 1812 } 1813 EXTERN_C_END 1814 1815 #undef __FUNCT__ 1816 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1817 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1818 { 1819 Mat mat; 1820 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1821 PetscErrorCode ierr; 1822 1823 PetscFunctionBegin; 1824 *newmat = 0; 1825 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1826 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1827 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1828 a = (Mat_MPIAIJ*)mat->data; 1829 1830 mat->factor = matin->factor; 1831 mat->bs = matin->bs; 1832 mat->assembled = PETSC_TRUE; 1833 mat->insertmode = NOT_SET_VALUES; 1834 mat->preallocated = PETSC_TRUE; 1835 1836 a->rstart = oldmat->rstart; 1837 a->rend = oldmat->rend; 1838 a->cstart = oldmat->cstart; 1839 a->cend = oldmat->cend; 1840 a->size = oldmat->size; 1841 a->rank = oldmat->rank; 1842 a->donotstash = oldmat->donotstash; 1843 a->roworiented = oldmat->roworiented; 1844 a->rowindices = 0; 1845 a->rowvalues = 0; 1846 a->getrowactive = PETSC_FALSE; 1847 1848 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr); 1849 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1850 if (oldmat->colmap) { 1851 #if defined (PETSC_USE_CTABLE) 1852 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1853 #else 1854 ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 1855 PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt)); 1856 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr); 1857 #endif 1858 } else a->colmap = 0; 1859 if (oldmat->garray) { 1860 PetscInt len; 1861 len = oldmat->B->n; 1862 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 1863 PetscLogObjectMemory(mat,len*sizeof(PetscInt)); 1864 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 1865 } else a->garray = 0; 1866 1867 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1868 PetscLogObjectParent(mat,a->lvec); 1869 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1870 PetscLogObjectParent(mat,a->Mvctx); 1871 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1872 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1873 PetscLogObjectParent(mat,a->A); 1874 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1875 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1876 PetscLogObjectParent(mat,a->B); 1877 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1878 *newmat = mat; 1879 PetscFunctionReturn(0); 1880 } 1881 1882 #include "petscsys.h" 1883 1884 #undef __FUNCT__ 1885 #define __FUNCT__ "MatLoad_MPIAIJ" 1886 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1887 { 1888 Mat A; 1889 PetscScalar *vals,*svals; 1890 MPI_Comm comm = ((PetscObject)viewer)->comm; 1891 MPI_Status status; 1892 PetscErrorCode ierr; 1893 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*rowners,maxnz,mm; 1894 PetscInt i,nz,j,rstart,rend; 1895 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 1896 PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1897 PetscInt cend,cstart,n; 1898 int fd; 1899 1900 PetscFunctionBegin; 1901 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1902 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1903 if (!rank) { 1904 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1905 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1906 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1907 if (header[3] < 0) { 1908 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1909 } 1910 } 1911 1912 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 1913 M = header[1]; N = header[2]; 1914 /* determine ownership of all rows */ 1915 m = M/size + ((M % size) > rank); 1916 mm = (PetscMPIInt) m; 1917 ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 1918 ierr = MPI_Allgather(&mm,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1919 rowners[0] = 0; 1920 for (i=2; i<=size; i++) { 1921 rowners[i] += rowners[i-1]; 1922 } 1923 rstart = rowners[rank]; 1924 rend = rowners[rank+1]; 1925 1926 /* distribute row lengths to all processors */ 1927 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr); 1928 offlens = ourlens + (rend-rstart); 1929 if (!rank) { 1930 ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1931 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1932 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr); 1933 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1934 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1935 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1936 } else { 1937 ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr); 1938 } 1939 1940 if (!rank) { 1941 /* calculate the number of nonzeros on each processor */ 1942 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 1943 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 1944 for (i=0; i<size; i++) { 1945 for (j=rowners[i]; j< rowners[i+1]; j++) { 1946 procsnz[i] += rowlengths[j]; 1947 } 1948 } 1949 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1950 1951 /* determine max buffer needed and allocate it */ 1952 maxnz = 0; 1953 for (i=0; i<size; i++) { 1954 maxnz = PetscMax(maxnz,procsnz[i]); 1955 } 1956 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1957 1958 /* read in my part of the matrix column indices */ 1959 nz = procsnz[0]; 1960 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1961 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1962 1963 /* read in every one elses and ship off */ 1964 for (i=1; i<size; i++) { 1965 nz = procsnz[i]; 1966 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1967 ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 1968 } 1969 ierr = PetscFree(cols);CHKERRQ(ierr); 1970 } else { 1971 /* determine buffer space needed for message */ 1972 nz = 0; 1973 for (i=0; i<m; i++) { 1974 nz += ourlens[i]; 1975 } 1976 ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 1977 1978 /* receive message of column indices*/ 1979 ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 1980 ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 1981 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1982 } 1983 1984 /* determine column ownership if matrix is not square */ 1985 if (N != M) { 1986 n = N/size + ((N % size) > rank); 1987 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1988 cstart = cend - n; 1989 } else { 1990 cstart = rstart; 1991 cend = rend; 1992 n = cend - cstart; 1993 } 1994 1995 /* loop over local rows, determining number of off diagonal entries */ 1996 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 1997 jj = 0; 1998 for (i=0; i<m; i++) { 1999 for (j=0; j<ourlens[i]; j++) { 2000 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2001 jj++; 2002 } 2003 } 2004 2005 /* create our matrix */ 2006 for (i=0; i<m; i++) { 2007 ourlens[i] -= offlens[i]; 2008 } 2009 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2010 ierr = MatSetType(A,type);CHKERRQ(ierr); 2011 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2012 2013 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2014 for (i=0; i<m; i++) { 2015 ourlens[i] += offlens[i]; 2016 } 2017 2018 if (!rank) { 2019 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2020 2021 /* read in my part of the matrix numerical values */ 2022 nz = procsnz[0]; 2023 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2024 2025 /* insert into matrix */ 2026 jj = rstart; 2027 smycols = mycols; 2028 svals = vals; 2029 for (i=0; i<m; i++) { 2030 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2031 smycols += ourlens[i]; 2032 svals += ourlens[i]; 2033 jj++; 2034 } 2035 2036 /* read in other processors and ship out */ 2037 for (i=1; i<size; i++) { 2038 nz = procsnz[i]; 2039 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2040 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2041 } 2042 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2043 } else { 2044 /* receive numeric values */ 2045 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2046 2047 /* receive message of values*/ 2048 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2049 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2050 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2051 2052 /* insert into matrix */ 2053 jj = rstart; 2054 smycols = mycols; 2055 svals = vals; 2056 for (i=0; i<m; i++) { 2057 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2058 smycols += ourlens[i]; 2059 svals += ourlens[i]; 2060 jj++; 2061 } 2062 } 2063 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2064 ierr = PetscFree(vals);CHKERRQ(ierr); 2065 ierr = PetscFree(mycols);CHKERRQ(ierr); 2066 ierr = PetscFree(rowners);CHKERRQ(ierr); 2067 2068 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2069 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2070 *newmat = A; 2071 PetscFunctionReturn(0); 2072 } 2073 2074 #undef __FUNCT__ 2075 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2076 /* 2077 Not great since it makes two copies of the submatrix, first an SeqAIJ 2078 in local and then by concatenating the local matrices the end result. 2079 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2080 */ 2081 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 2082 { 2083 PetscErrorCode ierr; 2084 PetscMPIInt rank,size; 2085 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j; 2086 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2087 Mat *local,M,Mreuse; 2088 PetscScalar *vwork,*aa; 2089 MPI_Comm comm = mat->comm; 2090 Mat_SeqAIJ *aij; 2091 2092 2093 PetscFunctionBegin; 2094 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2095 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2096 2097 if (call == MAT_REUSE_MATRIX) { 2098 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2099 if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2100 local = &Mreuse; 2101 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2102 } else { 2103 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2104 Mreuse = *local; 2105 ierr = PetscFree(local);CHKERRQ(ierr); 2106 } 2107 2108 /* 2109 m - number of local rows 2110 n - number of columns (same on all processors) 2111 rstart - first row in new global matrix generated 2112 */ 2113 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2114 if (call == MAT_INITIAL_MATRIX) { 2115 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2116 ii = aij->i; 2117 jj = aij->j; 2118 2119 /* 2120 Determine the number of non-zeros in the diagonal and off-diagonal 2121 portions of the matrix in order to do correct preallocation 2122 */ 2123 2124 /* first get start and end of "diagonal" columns */ 2125 if (csize == PETSC_DECIDE) { 2126 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2127 if (mglobal == n) { /* square matrix */ 2128 nlocal = m; 2129 } else { 2130 nlocal = n/size + ((n % size) > rank); 2131 } 2132 } else { 2133 nlocal = csize; 2134 } 2135 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2136 rstart = rend - nlocal; 2137 if (rank == size - 1 && rend != n) { 2138 SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 2139 } 2140 2141 /* next, compute all the lengths */ 2142 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 2143 olens = dlens + m; 2144 for (i=0; i<m; i++) { 2145 jend = ii[i+1] - ii[i]; 2146 olen = 0; 2147 dlen = 0; 2148 for (j=0; j<jend; j++) { 2149 if (*jj < rstart || *jj >= rend) olen++; 2150 else dlen++; 2151 jj++; 2152 } 2153 olens[i] = olen; 2154 dlens[i] = dlen; 2155 } 2156 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2157 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2158 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2159 ierr = PetscFree(dlens);CHKERRQ(ierr); 2160 } else { 2161 PetscInt ml,nl; 2162 2163 M = *newmat; 2164 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2165 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2166 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2167 /* 2168 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2169 rather than the slower MatSetValues(). 2170 */ 2171 M->was_assembled = PETSC_TRUE; 2172 M->assembled = PETSC_FALSE; 2173 } 2174 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2175 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2176 ii = aij->i; 2177 jj = aij->j; 2178 aa = aij->a; 2179 for (i=0; i<m; i++) { 2180 row = rstart + i; 2181 nz = ii[i+1] - ii[i]; 2182 cwork = jj; jj += nz; 2183 vwork = aa; aa += nz; 2184 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2185 } 2186 2187 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2188 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2189 *newmat = M; 2190 2191 /* save submatrix used in processor for next request */ 2192 if (call == MAT_INITIAL_MATRIX) { 2193 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2194 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2195 } 2196 2197 PetscFunctionReturn(0); 2198 } 2199 2200 EXTERN_C_BEGIN 2201 #undef __FUNCT__ 2202 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 2203 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[]) 2204 { 2205 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2206 PetscInt m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d; 2207 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii; 2208 const PetscInt *JJ; 2209 PetscScalar *values; 2210 PetscErrorCode ierr; 2211 2212 PetscFunctionBegin; 2213 #if defined(PETSC_OPT_g) 2214 if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]); 2215 #endif 2216 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2217 o_nnz = d_nnz + m; 2218 2219 for (i=0; i<m; i++) { 2220 nnz = I[i+1]- I[i]; 2221 JJ = J + I[i]; 2222 nnz_max = PetscMax(nnz_max,nnz); 2223 #if defined(PETSC_OPT_g) 2224 if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz); 2225 #endif 2226 for (j=0; j<nnz; j++) { 2227 if (*JJ >= cstart) break; 2228 JJ++; 2229 } 2230 d = 0; 2231 for (; j<nnz; j++) { 2232 if (*JJ++ >= cend) break; 2233 d++; 2234 } 2235 d_nnz[i] = d; 2236 o_nnz[i] = nnz - d; 2237 } 2238 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2239 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2240 2241 if (v) values = (PetscScalar*)v; 2242 else { 2243 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 2244 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2245 } 2246 2247 ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2248 for (i=0; i<m; i++) { 2249 ii = i + rstart; 2250 nnz = I[i+1]- I[i]; 2251 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr); 2252 } 2253 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2254 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2255 ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr); 2256 2257 if (!v) { 2258 ierr = PetscFree(values);CHKERRQ(ierr); 2259 } 2260 PetscFunctionReturn(0); 2261 } 2262 EXTERN_C_END 2263 2264 #undef __FUNCT__ 2265 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 2266 /*@C 2267 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 2268 (the default parallel PETSc format). 2269 2270 Collective on MPI_Comm 2271 2272 Input Parameters: 2273 + A - the matrix 2274 . i - the indices into j for the start of each local row (starts with zero) 2275 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2276 - v - optional values in the matrix 2277 2278 Level: developer 2279 2280 .keywords: matrix, aij, compressed row, sparse, parallel 2281 2282 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ 2283 @*/ 2284 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2285 { 2286 PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]); 2287 2288 PetscFunctionBegin; 2289 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 2290 if (f) { 2291 ierr = (*f)(B,i,j,v);CHKERRQ(ierr); 2292 } 2293 PetscFunctionReturn(0); 2294 } 2295 2296 #undef __FUNCT__ 2297 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2298 /*@C 2299 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 2300 (the default parallel PETSc format). For good matrix assembly performance 2301 the user should preallocate the matrix storage by setting the parameters 2302 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2303 performance can be increased by more than a factor of 50. 2304 2305 Collective on MPI_Comm 2306 2307 Input Parameters: 2308 + A - the matrix 2309 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2310 (same value is used for all local rows) 2311 . d_nnz - array containing the number of nonzeros in the various rows of the 2312 DIAGONAL portion of the local submatrix (possibly different for each row) 2313 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2314 The size of this array is equal to the number of local rows, i.e 'm'. 2315 You must leave room for the diagonal entry even if it is zero. 2316 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2317 submatrix (same value is used for all local rows). 2318 - o_nnz - array containing the number of nonzeros in the various rows of the 2319 OFF-DIAGONAL portion of the local submatrix (possibly different for 2320 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2321 structure. The size of this array is equal to the number 2322 of local rows, i.e 'm'. 2323 2324 The AIJ format (also called the Yale sparse matrix format or 2325 compressed row storage (CSR)), is fully compatible with standard Fortran 77 2326 storage. The stored row and column indices begin with zero. See the users manual for details. 2327 2328 The parallel matrix is partitioned such that the first m0 rows belong to 2329 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2330 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2331 2332 The DIAGONAL portion of the local submatrix of a processor can be defined 2333 as the submatrix which is obtained by extraction the part corresponding 2334 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2335 first row that belongs to the processor, and r2 is the last row belonging 2336 to the this processor. This is a square mxm matrix. The remaining portion 2337 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2338 2339 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2340 2341 Example usage: 2342 2343 Consider the following 8x8 matrix with 34 non-zero values, that is 2344 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2345 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2346 as follows: 2347 2348 .vb 2349 1 2 0 | 0 3 0 | 0 4 2350 Proc0 0 5 6 | 7 0 0 | 8 0 2351 9 0 10 | 11 0 0 | 12 0 2352 ------------------------------------- 2353 13 0 14 | 15 16 17 | 0 0 2354 Proc1 0 18 0 | 19 20 21 | 0 0 2355 0 0 0 | 22 23 0 | 24 0 2356 ------------------------------------- 2357 Proc2 25 26 27 | 0 0 28 | 29 0 2358 30 0 0 | 31 32 33 | 0 34 2359 .ve 2360 2361 This can be represented as a collection of submatrices as: 2362 2363 .vb 2364 A B C 2365 D E F 2366 G H I 2367 .ve 2368 2369 Where the submatrices A,B,C are owned by proc0, D,E,F are 2370 owned by proc1, G,H,I are owned by proc2. 2371 2372 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2373 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2374 The 'M','N' parameters are 8,8, and have the same values on all procs. 2375 2376 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2377 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2378 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2379 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2380 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2381 matrix, ans [DF] as another SeqAIJ matrix. 2382 2383 When d_nz, o_nz parameters are specified, d_nz storage elements are 2384 allocated for every row of the local diagonal submatrix, and o_nz 2385 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2386 One way to choose d_nz and o_nz is to use the max nonzerors per local 2387 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2388 In this case, the values of d_nz,o_nz are: 2389 .vb 2390 proc0 : dnz = 2, o_nz = 2 2391 proc1 : dnz = 3, o_nz = 2 2392 proc2 : dnz = 1, o_nz = 4 2393 .ve 2394 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2395 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2396 for proc3. i.e we are using 12+15+10=37 storage locations to store 2397 34 values. 2398 2399 When d_nnz, o_nnz parameters are specified, the storage is specified 2400 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2401 In the above case the values for d_nnz,o_nnz are: 2402 .vb 2403 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2404 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2405 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2406 .ve 2407 Here the space allocated is sum of all the above values i.e 34, and 2408 hence pre-allocation is perfect. 2409 2410 Level: intermediate 2411 2412 .keywords: matrix, aij, compressed row, sparse, parallel 2413 2414 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(), 2415 MPIAIJ 2416 @*/ 2417 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2418 { 2419 PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]); 2420 2421 PetscFunctionBegin; 2422 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2423 if (f) { 2424 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2425 } 2426 PetscFunctionReturn(0); 2427 } 2428 2429 #undef __FUNCT__ 2430 #define __FUNCT__ "MatCreateMPIAIJ" 2431 /*@C 2432 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2433 (the default parallel PETSc format). For good matrix assembly performance 2434 the user should preallocate the matrix storage by setting the parameters 2435 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2436 performance can be increased by more than a factor of 50. 2437 2438 Collective on MPI_Comm 2439 2440 Input Parameters: 2441 + comm - MPI communicator 2442 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2443 This value should be the same as the local size used in creating the 2444 y vector for the matrix-vector product y = Ax. 2445 . n - This value should be the same as the local size used in creating the 2446 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2447 calculated if N is given) For square matrices n is almost always m. 2448 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2449 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2450 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2451 (same value is used for all local rows) 2452 . d_nnz - array containing the number of nonzeros in the various rows of the 2453 DIAGONAL portion of the local submatrix (possibly different for each row) 2454 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2455 The size of this array is equal to the number of local rows, i.e 'm'. 2456 You must leave room for the diagonal entry even if it is zero. 2457 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2458 submatrix (same value is used for all local rows). 2459 - o_nnz - array containing the number of nonzeros in the various rows of the 2460 OFF-DIAGONAL portion of the local submatrix (possibly different for 2461 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2462 structure. The size of this array is equal to the number 2463 of local rows, i.e 'm'. 2464 2465 Output Parameter: 2466 . A - the matrix 2467 2468 Notes: 2469 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2470 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2471 storage requirements for this matrix. 2472 2473 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2474 processor than it must be used on all processors that share the object for 2475 that argument. 2476 2477 The user MUST specify either the local or global matrix dimensions 2478 (possibly both). 2479 2480 The parallel matrix is partitioned such that the first m0 rows belong to 2481 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2482 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2483 2484 The DIAGONAL portion of the local submatrix of a processor can be defined 2485 as the submatrix which is obtained by extraction the part corresponding 2486 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2487 first row that belongs to the processor, and r2 is the last row belonging 2488 to the this processor. This is a square mxm matrix. The remaining portion 2489 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2490 2491 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2492 2493 When calling this routine with a single process communicator, a matrix of 2494 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2495 type of communicator, use the construction mechanism: 2496 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2497 2498 By default, this format uses inodes (identical nodes) when possible. 2499 We search for consecutive rows with the same nonzero structure, thereby 2500 reusing matrix information to achieve increased efficiency. 2501 2502 Options Database Keys: 2503 + -mat_aij_no_inode - Do not use inodes 2504 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2505 - -mat_aij_oneindex - Internally use indexing starting at 1 2506 rather than 0. Note that when calling MatSetValues(), 2507 the user still MUST index entries starting at 0! 2508 2509 2510 Example usage: 2511 2512 Consider the following 8x8 matrix with 34 non-zero values, that is 2513 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2514 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2515 as follows: 2516 2517 .vb 2518 1 2 0 | 0 3 0 | 0 4 2519 Proc0 0 5 6 | 7 0 0 | 8 0 2520 9 0 10 | 11 0 0 | 12 0 2521 ------------------------------------- 2522 13 0 14 | 15 16 17 | 0 0 2523 Proc1 0 18 0 | 19 20 21 | 0 0 2524 0 0 0 | 22 23 0 | 24 0 2525 ------------------------------------- 2526 Proc2 25 26 27 | 0 0 28 | 29 0 2527 30 0 0 | 31 32 33 | 0 34 2528 .ve 2529 2530 This can be represented as a collection of submatrices as: 2531 2532 .vb 2533 A B C 2534 D E F 2535 G H I 2536 .ve 2537 2538 Where the submatrices A,B,C are owned by proc0, D,E,F are 2539 owned by proc1, G,H,I are owned by proc2. 2540 2541 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2542 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2543 The 'M','N' parameters are 8,8, and have the same values on all procs. 2544 2545 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2546 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2547 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2548 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2549 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2550 matrix, ans [DF] as another SeqAIJ matrix. 2551 2552 When d_nz, o_nz parameters are specified, d_nz storage elements are 2553 allocated for every row of the local diagonal submatrix, and o_nz 2554 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2555 One way to choose d_nz and o_nz is to use the max nonzerors per local 2556 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2557 In this case, the values of d_nz,o_nz are: 2558 .vb 2559 proc0 : dnz = 2, o_nz = 2 2560 proc1 : dnz = 3, o_nz = 2 2561 proc2 : dnz = 1, o_nz = 4 2562 .ve 2563 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2564 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2565 for proc3. i.e we are using 12+15+10=37 storage locations to store 2566 34 values. 2567 2568 When d_nnz, o_nnz parameters are specified, the storage is specified 2569 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2570 In the above case the values for d_nnz,o_nnz are: 2571 .vb 2572 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2573 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2574 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2575 .ve 2576 Here the space allocated is sum of all the above values i.e 34, and 2577 hence pre-allocation is perfect. 2578 2579 Level: intermediate 2580 2581 .keywords: matrix, aij, compressed row, sparse, parallel 2582 2583 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 2584 MPIAIJ 2585 @*/ 2586 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 2587 { 2588 PetscErrorCode ierr; 2589 PetscMPIInt size; 2590 2591 PetscFunctionBegin; 2592 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2593 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2594 if (size > 1) { 2595 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2596 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2597 } else { 2598 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2599 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2600 } 2601 PetscFunctionReturn(0); 2602 } 2603 2604 #undef __FUNCT__ 2605 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2606 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 2607 { 2608 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2609 2610 PetscFunctionBegin; 2611 *Ad = a->A; 2612 *Ao = a->B; 2613 *colmap = a->garray; 2614 PetscFunctionReturn(0); 2615 } 2616 2617 #undef __FUNCT__ 2618 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2619 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2620 { 2621 PetscErrorCode ierr; 2622 PetscInt i; 2623 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2624 2625 PetscFunctionBegin; 2626 if (coloring->ctype == IS_COLORING_LOCAL) { 2627 ISColoringValue *allcolors,*colors; 2628 ISColoring ocoloring; 2629 2630 /* set coloring for diagonal portion */ 2631 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2632 2633 /* set coloring for off-diagonal portion */ 2634 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2635 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2636 for (i=0; i<a->B->n; i++) { 2637 colors[i] = allcolors[a->garray[i]]; 2638 } 2639 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2640 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2641 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2642 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2643 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2644 ISColoringValue *colors; 2645 PetscInt *larray; 2646 ISColoring ocoloring; 2647 2648 /* set coloring for diagonal portion */ 2649 ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2650 for (i=0; i<a->A->n; i++) { 2651 larray[i] = i + a->cstart; 2652 } 2653 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2654 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2655 for (i=0; i<a->A->n; i++) { 2656 colors[i] = coloring->colors[larray[i]]; 2657 } 2658 ierr = PetscFree(larray);CHKERRQ(ierr); 2659 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2660 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2661 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2662 2663 /* set coloring for off-diagonal portion */ 2664 ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 2665 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2666 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2667 for (i=0; i<a->B->n; i++) { 2668 colors[i] = coloring->colors[larray[i]]; 2669 } 2670 ierr = PetscFree(larray);CHKERRQ(ierr); 2671 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2672 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2673 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2674 } else { 2675 SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 2676 } 2677 2678 PetscFunctionReturn(0); 2679 } 2680 2681 #undef __FUNCT__ 2682 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2683 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2684 { 2685 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2686 PetscErrorCode ierr; 2687 2688 PetscFunctionBegin; 2689 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2690 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2691 PetscFunctionReturn(0); 2692 } 2693 2694 #undef __FUNCT__ 2695 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2696 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 2697 { 2698 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2699 PetscErrorCode ierr; 2700 2701 PetscFunctionBegin; 2702 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2703 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2704 PetscFunctionReturn(0); 2705 } 2706 2707 #undef __FUNCT__ 2708 #define __FUNCT__ "MatMerge" 2709 /*@C 2710 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2711 matrices from each processor 2712 2713 Collective on MPI_Comm 2714 2715 Input Parameters: 2716 + comm - the communicators the parallel matrix will live on 2717 . inmat - the input sequential matrices 2718 . n - number of local columns (or PETSC_DECIDE) 2719 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2720 2721 Output Parameter: 2722 . outmat - the parallel matrix generated 2723 2724 Level: advanced 2725 2726 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2727 2728 @*/ 2729 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2730 { 2731 PetscErrorCode ierr; 2732 PetscInt m,N,i,rstart,nnz,I,*dnz,*onz; 2733 const PetscInt *indx; 2734 const PetscScalar *values; 2735 PetscMap columnmap,rowmap; 2736 2737 PetscFunctionBegin; 2738 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2739 /* 2740 PetscMPIInt rank; 2741 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2742 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N); 2743 */ 2744 if (scall == MAT_INITIAL_MATRIX){ 2745 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2746 if (n == PETSC_DECIDE){ 2747 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2748 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2749 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2750 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2751 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2752 } 2753 2754 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2755 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2756 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2757 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2758 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2759 2760 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2761 for (i=0;i<m;i++) { 2762 ierr = MatGetRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2763 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2764 ierr = MatRestoreRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 2765 } 2766 /* This routine will ONLY return MPIAIJ type matrix */ 2767 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2768 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2769 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2770 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2771 2772 } else if (scall == MAT_REUSE_MATRIX){ 2773 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2774 } else { 2775 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 2776 } 2777 2778 for (i=0;i<m;i++) { 2779 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2780 I = i + rstart; 2781 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2782 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2783 } 2784 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2785 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2786 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2787 2788 PetscFunctionReturn(0); 2789 } 2790 2791 #undef __FUNCT__ 2792 #define __FUNCT__ "MatFileSplit" 2793 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2794 { 2795 PetscErrorCode ierr; 2796 PetscMPIInt rank; 2797 PetscInt m,N,i,rstart,nnz; 2798 size_t len; 2799 const PetscInt *indx; 2800 PetscViewer out; 2801 char *name; 2802 Mat B; 2803 const PetscScalar *values; 2804 2805 PetscFunctionBegin; 2806 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2807 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2808 /* Should this be the type of the diagonal block of A? */ 2809 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2810 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2811 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2812 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2813 for (i=0;i<m;i++) { 2814 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2815 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2816 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2817 } 2818 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2819 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2820 2821 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2822 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2823 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2824 sprintf(name,"%s.%d",outfile,rank); 2825 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2826 ierr = PetscFree(name); 2827 ierr = MatView(B,out);CHKERRQ(ierr); 2828 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2829 ierr = MatDestroy(B);CHKERRQ(ierr); 2830 PetscFunctionReturn(0); 2831 } 2832 2833 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2834 #undef __FUNCT__ 2835 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2836 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2837 { 2838 PetscErrorCode ierr; 2839 Mat_Merge_SeqsToMPI *merge; 2840 PetscObjectContainer container; 2841 2842 PetscFunctionBegin; 2843 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2844 if (container) { 2845 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2846 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2847 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 2848 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 2849 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2850 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2851 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 2852 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 2853 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2854 if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);} 2855 if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);} 2856 if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);} 2857 2858 ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr); 2859 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 2860 } 2861 ierr = PetscFree(merge);CHKERRQ(ierr); 2862 2863 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2864 PetscFunctionReturn(0); 2865 } 2866 2867 #include "src/mat/utils/freespace.h" 2868 #include "petscbt.h" 2869 #undef __FUNCT__ 2870 #define __FUNCT__ "MatMerge_SeqsToMPI" 2871 /*@C 2872 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2873 matrices from each processor 2874 2875 Collective on MPI_Comm 2876 2877 Input Parameters: 2878 + comm - the communicators the parallel matrix will live on 2879 . seqmat - the input sequential matrices 2880 . m - number of local rows (or PETSC_DECIDE) 2881 . n - number of local columns (or PETSC_DECIDE) 2882 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2883 2884 Output Parameter: 2885 . mpimat - the parallel matrix generated 2886 2887 Level: advanced 2888 2889 Notes: 2890 The dimensions of the sequential matrix in each processor MUST be the same. 2891 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 2892 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 2893 @*/ 2894 static PetscEvent logkey_seqstompinum = 0; 2895 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat) 2896 { 2897 PetscErrorCode ierr; 2898 MPI_Comm comm=mpimat->comm; 2899 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2900 PetscMPIInt size,rank,taga,*len_s; 2901 PetscInt N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j; 2902 PetscInt proc,m; 2903 PetscInt **buf_ri,**buf_rj; 2904 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 2905 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 2906 MPI_Request *s_waits,*r_waits; 2907 MPI_Status *status; 2908 MatScalar *aa=a->a,**abuf_r,*ba_i; 2909 Mat_Merge_SeqsToMPI *merge; 2910 PetscObjectContainer container; 2911 2912 PetscFunctionBegin; 2913 if (!logkey_seqstompinum) { 2914 ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE); 2915 } 2916 ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 2917 2918 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2919 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2920 2921 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 2922 if (container) { 2923 ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 2924 } 2925 bi = merge->bi; 2926 bj = merge->bj; 2927 buf_ri = merge->buf_ri; 2928 buf_rj = merge->buf_rj; 2929 2930 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2931 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2932 len_s = merge->len_s; 2933 2934 /* send and recv matrix values */ 2935 /*-----------------------------*/ 2936 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 2937 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 2938 2939 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2940 for (proc=0,k=0; proc<size; proc++){ 2941 if (!len_s[proc]) continue; 2942 i = owners[proc]; 2943 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 2944 k++; 2945 } 2946 2947 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 2948 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 2949 ierr = PetscFree(status);CHKERRQ(ierr); 2950 2951 ierr = PetscFree(s_waits);CHKERRQ(ierr); 2952 ierr = PetscFree(r_waits);CHKERRQ(ierr); 2953 2954 /* insert mat values of mpimat */ 2955 /*----------------------------*/ 2956 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 2957 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 2958 nextrow = buf_ri_k + merge->nrecv; 2959 nextai = nextrow + merge->nrecv; 2960 2961 for (k=0; k<merge->nrecv; k++){ 2962 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 2963 nrows = *(buf_ri_k[k]); 2964 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 2965 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 2966 } 2967 2968 /* set values of ba */ 2969 ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2970 for (i=0; i<m; i++) { 2971 arow = owners[rank] + i; 2972 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 2973 bnzi = bi[i+1] - bi[i]; 2974 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 2975 2976 /* add local non-zero vals of this proc's seqmat into ba */ 2977 anzi = ai[arow+1] - ai[arow]; 2978 aj = a->j + ai[arow]; 2979 aa = a->a + ai[arow]; 2980 nextaj = 0; 2981 for (j=0; nextaj<anzi; j++){ 2982 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2983 ba_i[j] += aa[nextaj++]; 2984 } 2985 } 2986 2987 /* add received vals into ba */ 2988 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 2989 /* i-th row */ 2990 if (i == *nextrow[k]) { 2991 anzi = *(nextai[k]+1) - *nextai[k]; 2992 aj = buf_rj[k] + *(nextai[k]); 2993 aa = abuf_r[k] + *(nextai[k]); 2994 nextaj = 0; 2995 for (j=0; nextaj<anzi; j++){ 2996 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 2997 ba_i[j] += aa[nextaj++]; 2998 } 2999 } 3000 nextrow[k]++; nextai[k]++; 3001 } 3002 } 3003 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3004 } 3005 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3006 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3007 3008 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3009 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3010 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3011 ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3012 PetscFunctionReturn(0); 3013 } 3014 static PetscEvent logkey_seqstompisym = 0; 3015 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 3016 { 3017 PetscErrorCode ierr; 3018 Mat B_mpi; 3019 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 3020 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 3021 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 3022 PetscInt M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j; 3023 PetscInt len,proc,*dnz,*onz; 3024 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 3025 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 3026 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 3027 MPI_Status *status; 3028 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 3029 PetscBT lnkbt; 3030 Mat_Merge_SeqsToMPI *merge; 3031 PetscObjectContainer container; 3032 3033 PetscFunctionBegin; 3034 if (!logkey_seqstompisym) { 3035 ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE); 3036 } 3037 ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3038 3039 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3040 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3041 3042 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 3043 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 3044 3045 /* determine row ownership */ 3046 /*---------------------------------------------------------*/ 3047 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 3048 if (m == PETSC_DECIDE) { 3049 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 3050 } else { 3051 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 3052 } 3053 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 3054 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 3055 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 3056 3057 if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); } 3058 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 3059 3060 /* determine the number of messages to send, their lengths */ 3061 /*---------------------------------------------------------*/ 3062 len_s = merge->len_s; 3063 3064 len = 0; /* length of buf_si[] */ 3065 merge->nsend = 0; 3066 for (proc=0; proc<size; proc++){ 3067 len_si[proc] = 0; 3068 if (proc == rank){ 3069 len_s[proc] = 0; 3070 } else { 3071 len_si[proc] = owners[proc+1] - owners[proc] + 1; 3072 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 3073 } 3074 if (len_s[proc]) { 3075 merge->nsend++; 3076 nrows = 0; 3077 for (i=owners[proc]; i<owners[proc+1]; i++){ 3078 if (ai[i+1] > ai[i]) nrows++; 3079 } 3080 len_si[proc] = 2*(nrows+1); 3081 len += len_si[proc]; 3082 } 3083 } 3084 3085 /* determine the number and length of messages to receive for ij-structure */ 3086 /*-------------------------------------------------------------------------*/ 3087 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 3088 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 3089 3090 /* post the Irecv of j-structure */ 3091 /*-------------------------------*/ 3092 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr); 3093 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 3094 3095 /* post the Isend of j-structure */ 3096 /*--------------------------------*/ 3097 ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr); 3098 sj_waits = si_waits + merge->nsend; 3099 3100 for (proc=0, k=0; proc<size; proc++){ 3101 if (!len_s[proc]) continue; 3102 i = owners[proc]; 3103 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 3104 k++; 3105 } 3106 3107 /* receives and sends of j-structure are complete */ 3108 /*------------------------------------------------*/ 3109 ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr); 3110 ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr); 3111 3112 /* send and recv i-structure */ 3113 /*---------------------------*/ 3114 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr); 3115 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 3116 3117 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 3118 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 3119 for (proc=0,k=0; proc<size; proc++){ 3120 if (!len_s[proc]) continue; 3121 /* form outgoing message for i-structure: 3122 buf_si[0]: nrows to be sent 3123 [1:nrows]: row index (global) 3124 [nrows+1:2*nrows+1]: i-structure index 3125 */ 3126 /*-------------------------------------------*/ 3127 nrows = len_si[proc]/2 - 1; 3128 buf_si_i = buf_si + nrows+1; 3129 buf_si[0] = nrows; 3130 buf_si_i[0] = 0; 3131 nrows = 0; 3132 for (i=owners[proc]; i<owners[proc+1]; i++){ 3133 anzi = ai[i+1] - ai[i]; 3134 if (anzi) { 3135 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 3136 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 3137 nrows++; 3138 } 3139 } 3140 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 3141 k++; 3142 buf_si += len_si[proc]; 3143 } 3144 3145 ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr); 3146 ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr); 3147 3148 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 3149 for (i=0; i<merge->nrecv; i++){ 3150 ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 3151 } 3152 3153 ierr = PetscFree(len_si);CHKERRQ(ierr); 3154 ierr = PetscFree(len_ri);CHKERRQ(ierr); 3155 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 3156 ierr = PetscFree(si_waits);CHKERRQ(ierr); 3157 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 3158 ierr = PetscFree(buf_s);CHKERRQ(ierr); 3159 ierr = PetscFree(status);CHKERRQ(ierr); 3160 3161 /* compute a local seq matrix in each processor */ 3162 /*----------------------------------------------*/ 3163 /* allocate bi array and free space for accumulating nonzero column info */ 3164 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3165 bi[0] = 0; 3166 3167 /* create and initialize a linked list */ 3168 nlnk = N+1; 3169 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3170 3171 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 3172 len = 0; 3173 len = ai[owners[rank+1]] - ai[owners[rank]]; 3174 ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 3175 current_space = free_space; 3176 3177 /* determine symbolic info for each local row */ 3178 ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr); 3179 nextrow = buf_ri_k + merge->nrecv; 3180 nextai = nextrow + merge->nrecv; 3181 for (k=0; k<merge->nrecv; k++){ 3182 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3183 nrows = *buf_ri_k[k]; 3184 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 3185 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 3186 } 3187 3188 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3189 len = 0; 3190 for (i=0;i<m;i++) { 3191 bnzi = 0; 3192 /* add local non-zero cols of this proc's seqmat into lnk */ 3193 arow = owners[rank] + i; 3194 anzi = ai[arow+1] - ai[arow]; 3195 aj = a->j + ai[arow]; 3196 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3197 bnzi += nlnk; 3198 /* add received col data into lnk */ 3199 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3200 if (i == *nextrow[k]) { /* i-th row */ 3201 anzi = *(nextai[k]+1) - *nextai[k]; 3202 aj = buf_rj[k] + *nextai[k]; 3203 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3204 bnzi += nlnk; 3205 nextrow[k]++; nextai[k]++; 3206 } 3207 } 3208 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 3209 3210 /* if free space is not available, make more free space */ 3211 if (current_space->local_remaining<bnzi) { 3212 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3213 nspacedouble++; 3214 } 3215 /* copy data into free space, then initialize lnk */ 3216 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3217 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 3218 3219 current_space->array += bnzi; 3220 current_space->local_used += bnzi; 3221 current_space->local_remaining -= bnzi; 3222 3223 bi[i+1] = bi[i] + bnzi; 3224 } 3225 3226 ierr = PetscFree(buf_ri_k);CHKERRQ(ierr); 3227 3228 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3229 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3230 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3231 3232 /* create symbolic parallel matrix B_mpi */ 3233 /*---------------------------------------*/ 3234 if (n==PETSC_DECIDE) { 3235 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr); 3236 } else { 3237 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr); 3238 } 3239 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 3240 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 3241 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3242 3243 /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */ 3244 B_mpi->assembled = PETSC_FALSE; 3245 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3246 merge->bi = bi; 3247 merge->bj = bj; 3248 merge->buf_ri = buf_ri; 3249 merge->buf_rj = buf_rj; 3250 merge->coi = PETSC_NULL; 3251 merge->coj = PETSC_NULL; 3252 merge->owners_co = PETSC_NULL; 3253 3254 /* attach the supporting struct to B_mpi for reuse */ 3255 ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 3256 ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr); 3257 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 3258 *mpimat = B_mpi; 3259 ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 3260 PetscFunctionReturn(0); 3261 } 3262 3263 static PetscEvent logkey_seqstompi = 0; 3264 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 3265 { 3266 PetscErrorCode ierr; 3267 3268 PetscFunctionBegin; 3269 if (!logkey_seqstompi) { 3270 ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE); 3271 } 3272 ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3273 if (scall == MAT_INITIAL_MATRIX){ 3274 ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 3275 } 3276 ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr); 3277 ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 3278 PetscFunctionReturn(0); 3279 } 3280 static PetscEvent logkey_getlocalmat = 0; 3281 #undef __FUNCT__ 3282 #define __FUNCT__ "MatGetLocalMat" 3283 /*@C 3284 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3285 3286 Not Collective 3287 3288 Input Parameters: 3289 + A - the matrix 3290 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3291 3292 Output Parameter: 3293 . A_loc - the local sequential matrix generated 3294 3295 Level: developer 3296 3297 @*/ 3298 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 3299 { 3300 PetscErrorCode ierr; 3301 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 3302 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 3303 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 3304 PetscScalar *aa=a->a,*ba=b->a,*ca; 3305 PetscInt am=A->m,i,j,k,ncols,cstart=mpimat->cstart; 3306 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 3307 3308 PetscFunctionBegin; 3309 if (!logkey_getlocalmat) { 3310 ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE); 3311 } 3312 ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3313 if (scall == MAT_INITIAL_MATRIX){ 3314 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 3315 ci[0] = 0; 3316 for (i=0; i<am; i++){ 3317 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 3318 } 3319 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 3320 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 3321 k = 0; 3322 for (i=0; i<am; i++) { 3323 ncols_o = bi[i+1] - bi[i]; 3324 ncols_d = ai[i+1] - ai[i]; 3325 ncols = ncols_d + ncols_o; 3326 /* off-diagonal portion of A */ 3327 for (jo=0; jo<ncols_o; jo++) { 3328 col = cmap[*bj]; 3329 if (col >= cstart) break; 3330 cj[k] = col; bj++; 3331 ca[k++] = *ba++; 3332 } 3333 /* diagonal portion of A */ 3334 for (j=0; j<ncols_d; j++) { 3335 cj[k] = cstart + *aj++; 3336 ca[k++] = *aa++; 3337 } 3338 /* off-diagonal portion of A */ 3339 for (j=jo;j<ncols_o; j++) { 3340 cj[k] = cmap[*bj++]; 3341 ca[k++] = *ba++; 3342 } 3343 } 3344 /* put together the new matrix */ 3345 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 3346 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3347 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3348 mat = (Mat_SeqAIJ*)(*A_loc)->data; 3349 mat->freedata = PETSC_TRUE; 3350 mat->nonew = 0; 3351 } else if (scall == MAT_REUSE_MATRIX){ 3352 mat=(Mat_SeqAIJ*)(*A_loc)->data; 3353 ci = mat->i; cj = mat->j; ca = mat->a; 3354 for (i=0; i<am; i++) { 3355 /* off-diagonal portion of A */ 3356 ncols_o = bi[i+1] - bi[i]; 3357 for (jo=0; jo<ncols_o; jo++) { 3358 col = cmap[*bj]; 3359 if (col >= cstart) break; 3360 *ca++ = *ba++; 3361 } 3362 /* diagonal portion of A */ 3363 ncols = ai[i+1] - ai[i]; 3364 for (j=0; j<ncols; j++) *ca++ = *aa++; 3365 /* off-diagonal portion of A */ 3366 for (j=jo;j<ncols_o; j++) *ca++ = *ba++; 3367 } 3368 } else { 3369 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 3370 } 3371 3372 ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr); 3373 PetscFunctionReturn(0); 3374 } 3375 3376 static PetscEvent logkey_getlocalmatcondensed = 0; 3377 #undef __FUNCT__ 3378 #define __FUNCT__ "MatGetLocalMatCondensed" 3379 /*@C 3380 MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns 3381 3382 Not Collective 3383 3384 Input Parameters: 3385 + A - the matrix 3386 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3387 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3388 3389 Output Parameter: 3390 . A_loc - the local sequential matrix generated 3391 3392 Level: developer 3393 3394 @*/ 3395 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3396 { 3397 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3398 PetscErrorCode ierr; 3399 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 3400 IS isrowa,iscola; 3401 Mat *aloc; 3402 3403 PetscFunctionBegin; 3404 if (!logkey_getlocalmatcondensed) { 3405 ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE); 3406 } 3407 ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3408 if (!row){ 3409 start = a->rstart; end = a->rend; 3410 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3411 } else { 3412 isrowa = *row; 3413 } 3414 if (!col){ 3415 start = a->cstart; 3416 cmap = a->garray; 3417 nzA = a->A->n; 3418 nzB = a->B->n; 3419 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3420 ncols = 0; 3421 for (i=0; i<nzB; i++) { 3422 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3423 else break; 3424 } 3425 imark = i; 3426 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3427 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3428 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3429 ierr = PetscFree(idx);CHKERRQ(ierr); 3430 } else { 3431 iscola = *col; 3432 } 3433 if (scall != MAT_INITIAL_MATRIX){ 3434 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3435 aloc[0] = *A_loc; 3436 } 3437 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3438 *A_loc = aloc[0]; 3439 ierr = PetscFree(aloc);CHKERRQ(ierr); 3440 if (!row){ 3441 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3442 } 3443 if (!col){ 3444 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3445 } 3446 ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 3447 PetscFunctionReturn(0); 3448 } 3449 3450 static PetscEvent logkey_GetBrowsOfAcols = 0; 3451 #undef __FUNCT__ 3452 #define __FUNCT__ "MatGetBrowsOfAcols" 3453 /*@C 3454 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 3455 3456 Collective on Mat 3457 3458 Input Parameters: 3459 + A,B - the matrices in mpiaij format 3460 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3461 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3462 3463 Output Parameter: 3464 + rowb, colb - index sets of rows and columns of B to extract 3465 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3466 - B_seq - the sequential matrix generated 3467 3468 Level: developer 3469 3470 @*/ 3471 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3472 { 3473 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3474 PetscErrorCode ierr; 3475 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3476 IS isrowb,iscolb; 3477 Mat *bseq; 3478 3479 PetscFunctionBegin; 3480 if (a->cstart != b->rstart || a->cend != b->rend){ 3481 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend); 3482 } 3483 if (!logkey_GetBrowsOfAcols) { 3484 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE); 3485 } 3486 ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3487 3488 if (scall == MAT_INITIAL_MATRIX){ 3489 start = a->cstart; 3490 cmap = a->garray; 3491 nzA = a->A->n; 3492 nzB = a->B->n; 3493 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 3494 ncols = 0; 3495 for (i=0; i<nzB; i++) { /* row < local row index */ 3496 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3497 else break; 3498 } 3499 imark = i; 3500 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 3501 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 3502 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3503 ierr = PetscFree(idx);CHKERRQ(ierr); 3504 *brstart = imark; 3505 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3506 } else { 3507 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3508 isrowb = *rowb; iscolb = *colb; 3509 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3510 bseq[0] = *B_seq; 3511 } 3512 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3513 *B_seq = bseq[0]; 3514 ierr = PetscFree(bseq);CHKERRQ(ierr); 3515 if (!rowb){ 3516 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3517 } else { 3518 *rowb = isrowb; 3519 } 3520 if (!colb){ 3521 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3522 } else { 3523 *colb = iscolb; 3524 } 3525 ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 3526 PetscFunctionReturn(0); 3527 } 3528 3529 static PetscEvent logkey_GetBrowsOfAocols = 0; 3530 #undef __FUNCT__ 3531 #define __FUNCT__ "MatGetBrowsOfAoCols" 3532 /*@C 3533 MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 3534 of the OFF-DIAGONAL portion of local A 3535 3536 Collective on Mat 3537 3538 Input Parameters: 3539 + A,B - the matrices in mpiaij format 3540 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3541 . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 3542 - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 3543 3544 Output Parameter: 3545 + B_oth - the sequential matrix generated 3546 3547 Level: developer 3548 3549 @*/ 3550 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth) 3551 { 3552 VecScatter_MPI_General *gen_to,*gen_from; 3553 PetscErrorCode ierr; 3554 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3555 Mat_SeqAIJ *b_oth; 3556 VecScatter ctx=a->Mvctx; 3557 MPI_Comm comm=ctx->comm; 3558 PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank; 3559 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj; 3560 PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA; 3561 PetscInt i,j,k,l,nrecvs,nsends,nrows,*rrow,*srow,*rstarts,*rstartsj,*sstarts,*sstartsj,len; 3562 MPI_Request *rwaits,*swaits; 3563 MPI_Status *sstatus,rstatus; 3564 const PetscInt *cols; 3565 const PetscScalar *vals; 3566 3567 PetscFunctionBegin; 3568 if (a->cstart != b->rstart || a->cend != b->rend){ 3569 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3570 } 3571 if (!logkey_GetBrowsOfAocols) { 3572 ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE); 3573 } 3574 ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3575 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3576 3577 gen_to = (VecScatter_MPI_General*)ctx->todata; 3578 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 3579 rvalues = gen_from->values; /* holds the length of sending row */ 3580 svalues = gen_to->values; /* holds the length of receiving row */ 3581 nrecvs = gen_from->n; 3582 nsends = gen_to->n; 3583 rwaits = gen_from->requests; 3584 swaits = gen_to->requests; 3585 rrow = gen_from->indices; /* local row index to be received */ 3586 srow = gen_to->indices; /* local row index to be sent */ 3587 rstarts = gen_from->starts; 3588 sstarts = gen_to->starts; 3589 rprocs = gen_from->procs; 3590 sprocs = gen_to->procs; 3591 sstatus = gen_to->sstatus; 3592 3593 if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 3594 if (scall == MAT_INITIAL_MATRIX){ 3595 /* i-array */ 3596 /*---------*/ 3597 /* post receives */ 3598 for (i=0; i<nrecvs; i++){ 3599 rowlen = (PetscInt*)rvalues + rstarts[i]; 3600 nrows = rstarts[i+1]-rstarts[i]; 3601 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3602 } 3603 3604 /* pack the outgoing message */ 3605 ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr); 3606 rstartsj = sstartsj + nsends +1; 3607 sstartsj[0] = 0; rstartsj[0] = 0; 3608 len = 0; /* total length of j or a array to be sent */ 3609 k = 0; 3610 for (i=0; i<nsends; i++){ 3611 rowlen = (PetscInt*)svalues + sstarts[i]; 3612 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3613 for (j=0; j<nrows; j++) { 3614 row = srow[k] + b->rowners[rank]; /* global row idx */ 3615 ierr = MatGetRow(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 3616 len += rowlen[j]; 3617 ierr = MatRestoreRow(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 3618 k++; 3619 } 3620 ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3621 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 3622 } 3623 /* recvs and sends of i-array are completed */ 3624 i = nrecvs; 3625 while (i--) { 3626 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3627 } 3628 if (nsends) { 3629 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3630 } 3631 /* allocate buffers for sending j and a arrays */ 3632 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 3633 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 3634 3635 /* create i-array of B_oth */ 3636 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 3637 b_othi[0] = 0; 3638 len = 0; /* total length of j or a array to be received */ 3639 k = 0; 3640 for (i=0; i<nrecvs; i++){ 3641 rowlen = (PetscInt*)rvalues + rstarts[i]; 3642 nrows = rstarts[i+1]-rstarts[i]; 3643 for (j=0; j<nrows; j++) { 3644 b_othi[k+1] = b_othi[k] + rowlen[j]; 3645 len += rowlen[j]; k++; 3646 } 3647 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 3648 } 3649 3650 /* allocate space for j and a arrrays of B_oth */ 3651 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 3652 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr); 3653 3654 /* j-array */ 3655 /*---------*/ 3656 /* post receives of j-array */ 3657 for (i=0; i<nrecvs; i++){ 3658 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3659 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3660 } 3661 k = 0; 3662 for (i=0; i<nsends; i++){ 3663 nrows = sstarts[i+1]-sstarts[i]; /* num of rows */ 3664 bufJ = bufj+sstartsj[i]; 3665 for (j=0; j<nrows; j++) { 3666 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3667 ierr = MatGetRow(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3668 for (l=0; l<ncols; l++){ 3669 *bufJ++ = cols[l]; 3670 } 3671 ierr = MatRestoreRow(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 3672 } 3673 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3674 } 3675 3676 /* recvs and sends of j-array are completed */ 3677 i = nrecvs; 3678 while (i--) { 3679 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3680 } 3681 if (nsends) { 3682 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3683 } 3684 } else if (scall == MAT_REUSE_MATRIX){ 3685 sstartsj = *startsj; 3686 rstartsj = sstartsj + nsends +1; 3687 bufa = *bufa_ptr; 3688 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 3689 b_otha = b_oth->a; 3690 } else { 3691 SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 3692 } 3693 3694 /* a-array */ 3695 /*---------*/ 3696 /* post receives of a-array */ 3697 for (i=0; i<nrecvs; i++){ 3698 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 3699 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 3700 } 3701 k = 0; 3702 for (i=0; i<nsends; i++){ 3703 nrows = sstarts[i+1]-sstarts[i]; 3704 bufA = bufa+sstartsj[i]; 3705 for (j=0; j<nrows; j++) { 3706 row = srow[k++] + b->rowners[rank]; /* global row idx */ 3707 ierr = MatGetRow(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3708 for (l=0; l<ncols; l++){ 3709 *bufA++ = vals[l]; 3710 } 3711 ierr = MatRestoreRow(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 3712 3713 } 3714 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 3715 } 3716 /* recvs and sends of a-array are completed */ 3717 i = nrecvs; 3718 while (i--) { 3719 ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr); 3720 } 3721 if (nsends) { 3722 ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr); 3723 } 3724 3725 if (scall == MAT_INITIAL_MATRIX){ 3726 /* put together the new matrix */ 3727 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 3728 3729 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 3730 /* Since these are PETSc arrays, change flags to free them as necessary. */ 3731 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 3732 b_oth->freedata = PETSC_TRUE; 3733 b_oth->nonew = 0; 3734 3735 ierr = PetscFree(bufj);CHKERRQ(ierr); 3736 if (!startsj || !bufa_ptr){ 3737 ierr = PetscFree(sstartsj);CHKERRQ(ierr); 3738 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 3739 } else { 3740 *startsj = sstartsj; 3741 *bufa_ptr = bufa; 3742 } 3743 } 3744 ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 3745 3746 PetscFunctionReturn(0); 3747 } 3748 3749 /*MC 3750 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 3751 3752 Options Database Keys: 3753 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 3754 3755 Level: beginner 3756 3757 .seealso: MatCreateMPIAIJ 3758 M*/ 3759 3760 EXTERN_C_BEGIN 3761 #undef __FUNCT__ 3762 #define __FUNCT__ "MatCreate_MPIAIJ" 3763 PetscErrorCode MatCreate_MPIAIJ(Mat B) 3764 { 3765 Mat_MPIAIJ *b; 3766 PetscErrorCode ierr; 3767 PetscInt i; 3768 PetscMPIInt size; 3769 3770 PetscFunctionBegin; 3771 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 3772 3773 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 3774 B->data = (void*)b; 3775 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 3776 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3777 B->factor = 0; 3778 B->assembled = PETSC_FALSE; 3779 B->mapping = 0; 3780 3781 B->insertmode = NOT_SET_VALUES; 3782 b->size = size; 3783 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 3784 3785 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 3786 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 3787 3788 /* the information in the maps duplicates the information computed below, eventually 3789 we should remove the duplicate information that is not contained in the maps */ 3790 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 3791 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 3792 3793 /* build local table of row and column ownerships */ 3794 ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr); 3795 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 3796 b->cowners = b->rowners + b->size + 2; 3797 ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3798 b->rowners[0] = 0; 3799 for (i=2; i<=b->size; i++) { 3800 b->rowners[i] += b->rowners[i-1]; 3801 } 3802 b->rstart = b->rowners[b->rank]; 3803 b->rend = b->rowners[b->rank+1]; 3804 ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr); 3805 b->cowners[0] = 0; 3806 for (i=2; i<=b->size; i++) { 3807 b->cowners[i] += b->cowners[i-1]; 3808 } 3809 b->cstart = b->cowners[b->rank]; 3810 b->cend = b->cowners[b->rank+1]; 3811 3812 /* build cache for off array entries formed */ 3813 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 3814 b->donotstash = PETSC_FALSE; 3815 b->colmap = 0; 3816 b->garray = 0; 3817 b->roworiented = PETSC_TRUE; 3818 3819 /* stuff used for matrix vector multiply */ 3820 b->lvec = PETSC_NULL; 3821 b->Mvctx = PETSC_NULL; 3822 3823 /* stuff for MatGetRow() */ 3824 b->rowindices = 0; 3825 b->rowvalues = 0; 3826 b->getrowactive = PETSC_FALSE; 3827 3828 /* Explicitly create 2 MATSEQAIJ matrices. */ 3829 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 3830 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3831 PetscLogObjectParent(B,b->A); 3832 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 3833 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3834 PetscLogObjectParent(B,b->B); 3835 3836 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 3837 "MatStoreValues_MPIAIJ", 3838 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 3839 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 3840 "MatRetrieveValues_MPIAIJ", 3841 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 3842 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 3843 "MatGetDiagonalBlock_MPIAIJ", 3844 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 3845 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 3846 "MatIsTranspose_MPIAIJ", 3847 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 3848 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 3849 "MatMPIAIJSetPreallocation_MPIAIJ", 3850 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 3851 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 3852 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 3853 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 3854 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 3855 "MatDiagonalScaleLocal_MPIAIJ", 3856 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 3857 PetscFunctionReturn(0); 3858 } 3859 EXTERN_C_END 3860