1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 int n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (int*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(int));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (int*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(int));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 PetscErrorCode ierr; 191 int i,j,rstart = aij->rstart,rend = aij->rend; 192 int cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 int nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 PetscErrorCode ierr; 277 int i,j,rstart = aij->rstart,rend = aij->rend; 278 int cstart = aij->cstart,cend = aij->cend,row,col; 279 280 PetscFunctionBegin; 281 for (i=0; i<m; i++) { 282 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 283 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 284 if (idxm[i] >= rstart && idxm[i] < rend) { 285 row = idxm[i] - rstart; 286 for (j=0; j<n; j++) { 287 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 288 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 289 if (idxn[j] >= cstart && idxn[j] < cend){ 290 col = idxn[j] - cstart; 291 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 292 } else { 293 if (!aij->colmap) { 294 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined (PETSC_USE_CTABLE) 297 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 298 col --; 299 #else 300 col = aij->colmap[idxn[j]] - 1; 301 #endif 302 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 303 else { 304 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 305 } 306 } 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 310 } 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 318 { 319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 320 PetscErrorCode ierr; 321 int nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 337 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 338 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 339 PetscFunctionReturn(0); 340 } 341 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 346 { 347 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 348 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 349 PetscErrorCode ierr; 350 int i,j,rstart,ncols,n,flg; 351 int *row,*col,other_disassembled; 352 PetscScalar *val; 353 InsertMode addv = mat->insertmode; 354 355 PetscFunctionBegin; 356 if (!aij->donotstash) { 357 while (1) { 358 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 359 if (!flg) break; 360 361 for (i=0; i<n;) { 362 /* Now identify the consecutive vals belonging to the same row */ 363 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 364 if (j < n) ncols = j-i; 365 else ncols = n-i; 366 /* Now assemble all these values with a single function call */ 367 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 368 i = j; 369 } 370 } 371 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 372 } 373 374 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 375 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 376 377 /* determine if any processor has disassembled, if so we must 378 also disassemble ourselfs, in order that we may reassemble. */ 379 /* 380 if nonzero structure of submatrix B cannot change then we know that 381 no processor disassembled thus we can skip this stuff 382 */ 383 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 384 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 385 if (mat->was_assembled && !other_disassembled) { 386 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 387 } 388 } 389 390 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 391 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 392 } 393 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 395 396 if (aij->rowvalues) { 397 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 398 aij->rowvalues = 0; 399 } 400 401 /* used by MatAXPY() */ 402 a->xtoy = 0; b->xtoy = 0; 403 a->XtoY = 0; b->XtoY = 0; 404 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 410 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 411 { 412 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 413 PetscErrorCode ierr; 414 415 PetscFunctionBegin; 416 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 417 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatZeroRows_MPIAIJ" 423 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 424 { 425 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 426 PetscErrorCode ierr; 427 int i,N,*rows,*owners = l->rowners,size = l->size; 428 int *nprocs,j,idx,nsends,row; 429 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 430 int *rvalues,tag = A->tag,count,base,slen,n,*source; 431 int *lens,imdex,*lrows,*values,rstart=l->rstart; 432 MPI_Comm comm = A->comm; 433 MPI_Request *send_waits,*recv_waits; 434 MPI_Status recv_status,*send_status; 435 IS istmp; 436 PetscTruth found; 437 438 PetscFunctionBegin; 439 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 440 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 441 442 /* first count number of contributors to each processor */ 443 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 444 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 445 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 446 for (i=0; i<N; i++) { 447 idx = rows[i]; 448 found = PETSC_FALSE; 449 for (j=0; j<size; j++) { 450 if (idx >= owners[j] && idx < owners[j+1]) { 451 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 452 } 453 } 454 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 455 } 456 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 457 458 /* inform other processors of number of messages and max length*/ 459 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 460 461 /* post receives: */ 462 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 463 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 464 for (i=0; i<nrecvs; i++) { 465 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 466 } 467 468 /* do sends: 469 1) starts[i] gives the starting index in svalues for stuff going to 470 the ith processor 471 */ 472 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 473 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 474 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 475 starts[0] = 0; 476 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 477 for (i=0; i<N; i++) { 478 svalues[starts[owner[i]]++] = rows[i]; 479 } 480 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 481 482 starts[0] = 0; 483 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 484 count = 0; 485 for (i=0; i<size; i++) { 486 if (nprocs[2*i+1]) { 487 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 488 } 489 } 490 ierr = PetscFree(starts);CHKERRQ(ierr); 491 492 base = owners[rank]; 493 494 /* wait on receives */ 495 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 496 source = lens + nrecvs; 497 count = nrecvs; slen = 0; 498 while (count) { 499 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 500 /* unpack receives into our local space */ 501 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 502 source[imdex] = recv_status.MPI_SOURCE; 503 lens[imdex] = n; 504 slen += n; 505 count--; 506 } 507 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 508 509 /* move the data into the send scatter */ 510 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 511 count = 0; 512 for (i=0; i<nrecvs; i++) { 513 values = rvalues + i*nmax; 514 for (j=0; j<lens[i]; j++) { 515 lrows[count++] = values[j] - base; 516 } 517 } 518 ierr = PetscFree(rvalues);CHKERRQ(ierr); 519 ierr = PetscFree(lens);CHKERRQ(ierr); 520 ierr = PetscFree(owner);CHKERRQ(ierr); 521 ierr = PetscFree(nprocs);CHKERRQ(ierr); 522 523 /* actually zap the local rows */ 524 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 525 PetscLogObjectParent(A,istmp); 526 527 /* 528 Zero the required rows. If the "diagonal block" of the matrix 529 is square and the user wishes to set the diagonal we use seperate 530 code so that MatSetValues() is not called for each diagonal allocating 531 new memory, thus calling lots of mallocs and slowing things down. 532 533 Contributed by: Mathew Knepley 534 */ 535 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 536 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 537 if (diag && (l->A->M == l->A->N)) { 538 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 539 } else if (diag) { 540 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 541 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 542 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 544 } 545 for (i = 0; i < slen; i++) { 546 row = lrows[i] + rstart; 547 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 548 } 549 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 550 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 } else { 552 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 553 } 554 ierr = ISDestroy(istmp);CHKERRQ(ierr); 555 ierr = PetscFree(lrows);CHKERRQ(ierr); 556 557 /* wait on sends */ 558 if (nsends) { 559 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 560 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 561 ierr = PetscFree(send_status);CHKERRQ(ierr); 562 } 563 ierr = PetscFree(send_waits);CHKERRQ(ierr); 564 ierr = PetscFree(svalues);CHKERRQ(ierr); 565 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatMult_MPIAIJ" 571 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 572 { 573 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 574 PetscErrorCode ierr; 575 int nt; 576 577 PetscFunctionBegin; 578 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 579 if (nt != A->n) { 580 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 581 } 582 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 583 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 584 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 585 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatMultAdd_MPIAIJ" 591 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 592 { 593 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 594 PetscErrorCode ierr; 595 596 PetscFunctionBegin; 597 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 598 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 599 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 601 PetscFunctionReturn(0); 602 } 603 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 606 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 607 { 608 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 609 PetscErrorCode ierr; 610 611 PetscFunctionBegin; 612 /* do nondiagonal part */ 613 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 614 /* send it on its way */ 615 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 616 /* do local part */ 617 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 618 /* receive remote parts: note this assumes the values are not actually */ 619 /* inserted in yy until the next line, which is true for my implementation*/ 620 /* but is not perhaps always true. */ 621 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 622 PetscFunctionReturn(0); 623 } 624 625 EXTERN_C_BEGIN 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 628 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 629 { 630 MPI_Comm comm; 631 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 632 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 633 IS Me,Notme; 634 PetscErrorCode ierr; 635 int M,N,first,last,*notme,ntids,i; 636 637 PetscFunctionBegin; 638 639 /* Easy test: symmetric diagonal block */ 640 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 641 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 642 if (!*f) PetscFunctionReturn(0); 643 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 644 ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr); 645 if (ntids==1) PetscFunctionReturn(0); 646 647 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 648 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 649 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 650 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me);CHKERRQ(ierr); 651 for (i=0; i<first; i++) notme[i] = i; 652 for (i=last; i<M; i++) notme[i-last+first] = i; 653 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 654 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 655 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 656 Aoff = Aoffs[0]; 657 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 658 Boff = Boffs[0]; 659 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 660 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 661 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 662 ierr = ISDestroy(Me);CHKERRQ(ierr); 663 ierr = ISDestroy(Notme);CHKERRQ(ierr); 664 665 PetscFunctionReturn(0); 666 } 667 EXTERN_C_END 668 669 #undef __FUNCT__ 670 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 671 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 672 { 673 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 674 PetscErrorCode ierr; 675 676 PetscFunctionBegin; 677 /* do nondiagonal part */ 678 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 679 /* send it on its way */ 680 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 681 /* do local part */ 682 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 683 /* receive remote parts: note this assumes the values are not actually */ 684 /* inserted in yy until the next line, which is true for my implementation*/ 685 /* but is not perhaps always true. */ 686 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 /* 691 This only works correctly for square matrices where the subblock A->A is the 692 diagonal block 693 */ 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 696 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 697 { 698 PetscErrorCode ierr; 699 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 700 701 PetscFunctionBegin; 702 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 703 if (a->rstart != a->cstart || a->rend != a->cend) { 704 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 705 } 706 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "MatScale_MPIAIJ" 712 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 713 { 714 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 715 PetscErrorCode ierr; 716 717 PetscFunctionBegin; 718 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 719 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 720 PetscFunctionReturn(0); 721 } 722 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatDestroy_MPIAIJ" 725 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 726 { 727 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 728 PetscErrorCode ierr; 729 730 PetscFunctionBegin; 731 #if defined(PETSC_USE_LOG) 732 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 733 #endif 734 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 735 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 736 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 737 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 738 #if defined (PETSC_USE_CTABLE) 739 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 740 #else 741 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 742 #endif 743 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 744 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 745 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 746 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 747 ierr = PetscFree(aij);CHKERRQ(ierr); 748 749 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 750 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 751 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 752 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 753 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 754 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatView_MPIAIJ_Binary" 760 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 761 { 762 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 763 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 764 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 765 PetscErrorCode ierr; 766 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 767 int fd; 768 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 769 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 770 PetscScalar *column_values; 771 772 PetscFunctionBegin; 773 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 774 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 775 nz = A->nz + B->nz; 776 if (!rank) { 777 header[0] = MAT_FILE_COOKIE; 778 header[1] = mat->M; 779 header[2] = mat->N; 780 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 781 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 782 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 783 /* get largest number of rows any processor has */ 784 rlen = mat->m; 785 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 786 for (i=1; i<size; i++) { 787 rlen = PetscMax(rlen,range[i+1] - range[i]); 788 } 789 } else { 790 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 791 rlen = mat->m; 792 } 793 794 /* load up the local row counts */ 795 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 796 for (i=0; i<mat->m; i++) { 797 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 798 } 799 800 /* store the row lengths to the file */ 801 if (!rank) { 802 MPI_Status status; 803 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 804 for (i=1; i<size; i++) { 805 rlen = range[i+1] - range[i]; 806 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 807 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 808 } 809 } else { 810 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 811 } 812 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 813 814 /* load up the local column indices */ 815 nzmax = nz; /* )th processor needs space a largest processor needs */ 816 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 817 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 818 cnt = 0; 819 for (i=0; i<mat->m; i++) { 820 for (j=B->i[i]; j<B->i[i+1]; j++) { 821 if ( (col = garray[B->j[j]]) > cstart) break; 822 column_indices[cnt++] = col; 823 } 824 for (k=A->i[i]; k<A->i[i+1]; k++) { 825 column_indices[cnt++] = A->j[k] + cstart; 826 } 827 for (; j<B->i[i+1]; j++) { 828 column_indices[cnt++] = garray[B->j[j]]; 829 } 830 } 831 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 832 833 /* store the column indices to the file */ 834 if (!rank) { 835 MPI_Status status; 836 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 837 for (i=1; i<size; i++) { 838 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 839 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 840 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 841 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 842 } 843 } else { 844 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 845 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 846 } 847 ierr = PetscFree(column_indices);CHKERRQ(ierr); 848 849 /* load up the local column values */ 850 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 851 cnt = 0; 852 for (i=0; i<mat->m; i++) { 853 for (j=B->i[i]; j<B->i[i+1]; j++) { 854 if ( garray[B->j[j]] > cstart) break; 855 column_values[cnt++] = B->a[j]; 856 } 857 for (k=A->i[i]; k<A->i[i+1]; k++) { 858 column_values[cnt++] = A->a[k]; 859 } 860 for (; j<B->i[i+1]; j++) { 861 column_values[cnt++] = B->a[j]; 862 } 863 } 864 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 865 866 /* store the column values to the file */ 867 if (!rank) { 868 MPI_Status status; 869 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 870 for (i=1; i<size; i++) { 871 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 872 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 873 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 874 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 875 } 876 } else { 877 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 878 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 879 } 880 ierr = PetscFree(column_values);CHKERRQ(ierr); 881 PetscFunctionReturn(0); 882 } 883 884 #undef __FUNCT__ 885 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 886 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 887 { 888 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 889 PetscErrorCode ierr; 890 PetscMPIInt rank = aij->rank,size = aij->size; 891 PetscTruth isdraw,iascii,flg,isbinary; 892 PetscViewer sviewer; 893 PetscViewerFormat format; 894 895 PetscFunctionBegin; 896 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 897 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 898 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 899 if (iascii) { 900 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 901 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 902 MatInfo info; 903 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 904 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 905 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 906 if (flg) { 907 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 908 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 909 } else { 910 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 911 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 912 } 913 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 914 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 915 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 916 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 917 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 918 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 919 PetscFunctionReturn(0); 920 } else if (format == PETSC_VIEWER_ASCII_INFO) { 921 PetscFunctionReturn(0); 922 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 923 PetscFunctionReturn(0); 924 } 925 } else if (isbinary) { 926 if (size == 1) { 927 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 928 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 929 } else { 930 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 931 } 932 PetscFunctionReturn(0); 933 } else if (isdraw) { 934 PetscDraw draw; 935 PetscTruth isnull; 936 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 937 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 938 } 939 940 if (size == 1) { 941 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 942 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 943 } else { 944 /* assemble the entire matrix onto first processor. */ 945 Mat A; 946 Mat_SeqAIJ *Aloc; 947 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 948 PetscScalar *a; 949 950 if (!rank) { 951 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 952 } else { 953 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 954 } 955 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 956 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 957 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 958 PetscLogObjectParent(mat,A); 959 960 /* copy over the A part */ 961 Aloc = (Mat_SeqAIJ*)aij->A->data; 962 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 963 row = aij->rstart; 964 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 965 for (i=0; i<m; i++) { 966 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 967 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 968 } 969 aj = Aloc->j; 970 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 971 972 /* copy over the B part */ 973 Aloc = (Mat_SeqAIJ*)aij->B->data; 974 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 975 row = aij->rstart; 976 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 977 ct = cols; 978 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 979 for (i=0; i<m; i++) { 980 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 981 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 982 } 983 ierr = PetscFree(ct);CHKERRQ(ierr); 984 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 985 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 986 /* 987 Everyone has to call to draw the matrix since the graphics waits are 988 synchronized across all processors that share the PetscDraw object 989 */ 990 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 991 if (!rank) { 992 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 993 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 994 } 995 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 996 ierr = MatDestroy(A);CHKERRQ(ierr); 997 } 998 PetscFunctionReturn(0); 999 } 1000 1001 #undef __FUNCT__ 1002 #define __FUNCT__ "MatView_MPIAIJ" 1003 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1004 { 1005 PetscErrorCode ierr; 1006 PetscTruth iascii,isdraw,issocket,isbinary; 1007 1008 PetscFunctionBegin; 1009 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1010 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1011 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1012 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1013 if (iascii || isdraw || isbinary || issocket) { 1014 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1015 } else { 1016 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1017 } 1018 PetscFunctionReturn(0); 1019 } 1020 1021 1022 1023 #undef __FUNCT__ 1024 #define __FUNCT__ "MatRelax_MPIAIJ" 1025 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1026 { 1027 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1028 PetscErrorCode ierr; 1029 Vec bb1; 1030 PetscScalar mone=-1.0; 1031 1032 PetscFunctionBegin; 1033 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1034 1035 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1036 1037 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1038 if (flag & SOR_ZERO_INITIAL_GUESS) { 1039 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1040 its--; 1041 } 1042 1043 while (its--) { 1044 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1045 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1046 1047 /* update rhs: bb1 = bb - B*x */ 1048 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1049 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1050 1051 /* local sweep */ 1052 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1053 CHKERRQ(ierr); 1054 } 1055 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1056 if (flag & SOR_ZERO_INITIAL_GUESS) { 1057 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1058 its--; 1059 } 1060 while (its--) { 1061 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1062 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1063 1064 /* update rhs: bb1 = bb - B*x */ 1065 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1066 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1067 1068 /* local sweep */ 1069 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1070 CHKERRQ(ierr); 1071 } 1072 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1073 if (flag & SOR_ZERO_INITIAL_GUESS) { 1074 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1075 its--; 1076 } 1077 while (its--) { 1078 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1079 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1080 1081 /* update rhs: bb1 = bb - B*x */ 1082 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1083 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1084 1085 /* local sweep */ 1086 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1087 CHKERRQ(ierr); 1088 } 1089 } else { 1090 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1091 } 1092 1093 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 #undef __FUNCT__ 1098 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1099 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1100 { 1101 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1102 Mat A = mat->A,B = mat->B; 1103 PetscErrorCode ierr; 1104 PetscReal isend[5],irecv[5]; 1105 1106 PetscFunctionBegin; 1107 info->block_size = 1.0; 1108 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1109 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1110 isend[3] = info->memory; isend[4] = info->mallocs; 1111 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1112 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1113 isend[3] += info->memory; isend[4] += info->mallocs; 1114 if (flag == MAT_LOCAL) { 1115 info->nz_used = isend[0]; 1116 info->nz_allocated = isend[1]; 1117 info->nz_unneeded = isend[2]; 1118 info->memory = isend[3]; 1119 info->mallocs = isend[4]; 1120 } else if (flag == MAT_GLOBAL_MAX) { 1121 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1122 info->nz_used = irecv[0]; 1123 info->nz_allocated = irecv[1]; 1124 info->nz_unneeded = irecv[2]; 1125 info->memory = irecv[3]; 1126 info->mallocs = irecv[4]; 1127 } else if (flag == MAT_GLOBAL_SUM) { 1128 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1129 info->nz_used = irecv[0]; 1130 info->nz_allocated = irecv[1]; 1131 info->nz_unneeded = irecv[2]; 1132 info->memory = irecv[3]; 1133 info->mallocs = irecv[4]; 1134 } 1135 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1136 info->fill_ratio_needed = 0; 1137 info->factor_mallocs = 0; 1138 info->rows_global = (double)matin->M; 1139 info->columns_global = (double)matin->N; 1140 info->rows_local = (double)matin->m; 1141 info->columns_local = (double)matin->N; 1142 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatSetOption_MPIAIJ" 1148 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1149 { 1150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1151 PetscErrorCode ierr; 1152 1153 PetscFunctionBegin; 1154 switch (op) { 1155 case MAT_NO_NEW_NONZERO_LOCATIONS: 1156 case MAT_YES_NEW_NONZERO_LOCATIONS: 1157 case MAT_COLUMNS_UNSORTED: 1158 case MAT_COLUMNS_SORTED: 1159 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1160 case MAT_KEEP_ZEROED_ROWS: 1161 case MAT_NEW_NONZERO_LOCATION_ERR: 1162 case MAT_USE_INODES: 1163 case MAT_DO_NOT_USE_INODES: 1164 case MAT_IGNORE_ZERO_ENTRIES: 1165 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1166 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1167 break; 1168 case MAT_ROW_ORIENTED: 1169 a->roworiented = PETSC_TRUE; 1170 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1171 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1172 break; 1173 case MAT_ROWS_SORTED: 1174 case MAT_ROWS_UNSORTED: 1175 case MAT_YES_NEW_DIAGONALS: 1176 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1177 break; 1178 case MAT_COLUMN_ORIENTED: 1179 a->roworiented = PETSC_FALSE; 1180 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1181 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1182 break; 1183 case MAT_IGNORE_OFF_PROC_ENTRIES: 1184 a->donotstash = PETSC_TRUE; 1185 break; 1186 case MAT_NO_NEW_DIAGONALS: 1187 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1188 case MAT_SYMMETRIC: 1189 case MAT_STRUCTURALLY_SYMMETRIC: 1190 case MAT_HERMITIAN: 1191 case MAT_SYMMETRY_ETERNAL: 1192 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1193 break; 1194 case MAT_NOT_SYMMETRIC: 1195 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1196 case MAT_NOT_HERMITIAN: 1197 case MAT_NOT_SYMMETRY_ETERNAL: 1198 break; 1199 default: 1200 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1201 } 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "MatGetRow_MPIAIJ" 1207 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1208 { 1209 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1210 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1211 PetscErrorCode ierr; 1212 int i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1213 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1214 int *cmap,*idx_p; 1215 1216 PetscFunctionBegin; 1217 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1218 mat->getrowactive = PETSC_TRUE; 1219 1220 if (!mat->rowvalues && (idx || v)) { 1221 /* 1222 allocate enough space to hold information from the longest row. 1223 */ 1224 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1225 int max = 1,tmp; 1226 for (i=0; i<matin->m; i++) { 1227 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1228 if (max < tmp) { max = tmp; } 1229 } 1230 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1231 mat->rowindices = (int*)(mat->rowvalues + max); 1232 } 1233 1234 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1235 lrow = row - rstart; 1236 1237 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1238 if (!v) {pvA = 0; pvB = 0;} 1239 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1240 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1241 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1242 nztot = nzA + nzB; 1243 1244 cmap = mat->garray; 1245 if (v || idx) { 1246 if (nztot) { 1247 /* Sort by increasing column numbers, assuming A and B already sorted */ 1248 int imark = -1; 1249 if (v) { 1250 *v = v_p = mat->rowvalues; 1251 for (i=0; i<nzB; i++) { 1252 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1253 else break; 1254 } 1255 imark = i; 1256 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1257 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1258 } 1259 if (idx) { 1260 *idx = idx_p = mat->rowindices; 1261 if (imark > -1) { 1262 for (i=0; i<imark; i++) { 1263 idx_p[i] = cmap[cworkB[i]]; 1264 } 1265 } else { 1266 for (i=0; i<nzB; i++) { 1267 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1268 else break; 1269 } 1270 imark = i; 1271 } 1272 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1273 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1274 } 1275 } else { 1276 if (idx) *idx = 0; 1277 if (v) *v = 0; 1278 } 1279 } 1280 *nz = nztot; 1281 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1282 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1288 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1289 { 1290 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1291 1292 PetscFunctionBegin; 1293 if (aij->getrowactive == PETSC_FALSE) { 1294 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1295 } 1296 aij->getrowactive = PETSC_FALSE; 1297 PetscFunctionReturn(0); 1298 } 1299 1300 #undef __FUNCT__ 1301 #define __FUNCT__ "MatNorm_MPIAIJ" 1302 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1303 { 1304 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1305 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1306 PetscErrorCode ierr; 1307 int i,j,cstart = aij->cstart; 1308 PetscReal sum = 0.0; 1309 PetscScalar *v; 1310 1311 PetscFunctionBegin; 1312 if (aij->size == 1) { 1313 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1314 } else { 1315 if (type == NORM_FROBENIUS) { 1316 v = amat->a; 1317 for (i=0; i<amat->nz; i++) { 1318 #if defined(PETSC_USE_COMPLEX) 1319 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1320 #else 1321 sum += (*v)*(*v); v++; 1322 #endif 1323 } 1324 v = bmat->a; 1325 for (i=0; i<bmat->nz; i++) { 1326 #if defined(PETSC_USE_COMPLEX) 1327 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1328 #else 1329 sum += (*v)*(*v); v++; 1330 #endif 1331 } 1332 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1333 *norm = sqrt(*norm); 1334 } else if (type == NORM_1) { /* max column norm */ 1335 PetscReal *tmp,*tmp2; 1336 int *jj,*garray = aij->garray; 1337 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1338 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1339 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1340 *norm = 0.0; 1341 v = amat->a; jj = amat->j; 1342 for (j=0; j<amat->nz; j++) { 1343 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1344 } 1345 v = bmat->a; jj = bmat->j; 1346 for (j=0; j<bmat->nz; j++) { 1347 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1348 } 1349 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1350 for (j=0; j<mat->N; j++) { 1351 if (tmp2[j] > *norm) *norm = tmp2[j]; 1352 } 1353 ierr = PetscFree(tmp);CHKERRQ(ierr); 1354 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1355 } else if (type == NORM_INFINITY) { /* max row norm */ 1356 PetscReal ntemp = 0.0; 1357 for (j=0; j<aij->A->m; j++) { 1358 v = amat->a + amat->i[j]; 1359 sum = 0.0; 1360 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1361 sum += PetscAbsScalar(*v); v++; 1362 } 1363 v = bmat->a + bmat->i[j]; 1364 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1365 sum += PetscAbsScalar(*v); v++; 1366 } 1367 if (sum > ntemp) ntemp = sum; 1368 } 1369 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1370 } else { 1371 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1372 } 1373 } 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #undef __FUNCT__ 1378 #define __FUNCT__ "MatTranspose_MPIAIJ" 1379 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1380 { 1381 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1382 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1383 PetscErrorCode ierr; 1384 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1385 Mat B; 1386 PetscScalar *array; 1387 1388 PetscFunctionBegin; 1389 if (!matout && M != N) { 1390 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1391 } 1392 1393 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1394 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1395 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1396 1397 /* copy over the A part */ 1398 Aloc = (Mat_SeqAIJ*)a->A->data; 1399 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1400 row = a->rstart; 1401 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1402 for (i=0; i<m; i++) { 1403 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1404 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1405 } 1406 aj = Aloc->j; 1407 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1408 1409 /* copy over the B part */ 1410 Aloc = (Mat_SeqAIJ*)a->B->data; 1411 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1412 row = a->rstart; 1413 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1414 ct = cols; 1415 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1416 for (i=0; i<m; i++) { 1417 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1418 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1419 } 1420 ierr = PetscFree(ct);CHKERRQ(ierr); 1421 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1422 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1423 if (matout) { 1424 *matout = B; 1425 } else { 1426 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1427 } 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #undef __FUNCT__ 1432 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1433 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1434 { 1435 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1436 Mat a = aij->A,b = aij->B; 1437 PetscErrorCode ierr; 1438 int s1,s2,s3; 1439 1440 PetscFunctionBegin; 1441 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1442 if (rr) { 1443 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1444 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1445 /* Overlap communication with computation. */ 1446 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1447 } 1448 if (ll) { 1449 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1450 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1451 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1452 } 1453 /* scale the diagonal block */ 1454 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1455 1456 if (rr) { 1457 /* Do a scatter end and then right scale the off-diagonal block */ 1458 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1459 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1460 } 1461 1462 PetscFunctionReturn(0); 1463 } 1464 1465 1466 #undef __FUNCT__ 1467 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1468 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1469 { 1470 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1471 PetscErrorCode ierr; 1472 1473 PetscFunctionBegin; 1474 if (!a->rank) { 1475 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1476 } 1477 PetscFunctionReturn(0); 1478 } 1479 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1482 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1483 { 1484 PetscFunctionBegin; 1485 *bs = 1; 1486 PetscFunctionReturn(0); 1487 } 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1490 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1491 { 1492 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1493 PetscErrorCode ierr; 1494 1495 PetscFunctionBegin; 1496 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 1500 #undef __FUNCT__ 1501 #define __FUNCT__ "MatEqual_MPIAIJ" 1502 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1503 { 1504 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1505 Mat a,b,c,d; 1506 PetscTruth flg; 1507 PetscErrorCode ierr; 1508 1509 PetscFunctionBegin; 1510 a = matA->A; b = matA->B; 1511 c = matB->A; d = matB->B; 1512 1513 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1514 if (flg == PETSC_TRUE) { 1515 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1516 } 1517 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1518 PetscFunctionReturn(0); 1519 } 1520 1521 #undef __FUNCT__ 1522 #define __FUNCT__ "MatCopy_MPIAIJ" 1523 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1524 { 1525 PetscErrorCode ierr; 1526 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1527 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1528 1529 PetscFunctionBegin; 1530 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1531 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1532 /* because of the column compression in the off-processor part of the matrix a->B, 1533 the number of columns in a->B and b->B may be different, hence we cannot call 1534 the MatCopy() directly on the two parts. If need be, we can provide a more 1535 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1536 then copying the submatrices */ 1537 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1538 } else { 1539 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1540 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1541 } 1542 PetscFunctionReturn(0); 1543 } 1544 1545 #undef __FUNCT__ 1546 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1547 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1548 { 1549 PetscErrorCode ierr; 1550 1551 PetscFunctionBegin; 1552 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1553 PetscFunctionReturn(0); 1554 } 1555 1556 #include "petscblaslapack.h" 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "MatAXPY_MPIAIJ" 1559 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1560 { 1561 PetscErrorCode ierr; 1562 int i; 1563 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1564 PetscBLASInt bnz,one=1; 1565 Mat_SeqAIJ *x,*y; 1566 1567 PetscFunctionBegin; 1568 if (str == SAME_NONZERO_PATTERN) { 1569 x = (Mat_SeqAIJ *)xx->A->data; 1570 y = (Mat_SeqAIJ *)yy->A->data; 1571 bnz = (PetscBLASInt)x->nz; 1572 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1573 x = (Mat_SeqAIJ *)xx->B->data; 1574 y = (Mat_SeqAIJ *)yy->B->data; 1575 bnz = (PetscBLASInt)x->nz; 1576 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1577 } else if (str == SUBSET_NONZERO_PATTERN) { 1578 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1579 1580 x = (Mat_SeqAIJ *)xx->B->data; 1581 y = (Mat_SeqAIJ *)yy->B->data; 1582 if (y->xtoy && y->XtoY != xx->B) { 1583 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1584 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1585 } 1586 if (!y->xtoy) { /* get xtoy */ 1587 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1588 y->XtoY = xx->B; 1589 } 1590 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1591 } else { 1592 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1593 } 1594 PetscFunctionReturn(0); 1595 } 1596 1597 /* -------------------------------------------------------------------*/ 1598 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1599 MatGetRow_MPIAIJ, 1600 MatRestoreRow_MPIAIJ, 1601 MatMult_MPIAIJ, 1602 /* 4*/ MatMultAdd_MPIAIJ, 1603 MatMultTranspose_MPIAIJ, 1604 MatMultTransposeAdd_MPIAIJ, 1605 0, 1606 0, 1607 0, 1608 /*10*/ 0, 1609 0, 1610 0, 1611 MatRelax_MPIAIJ, 1612 MatTranspose_MPIAIJ, 1613 /*15*/ MatGetInfo_MPIAIJ, 1614 MatEqual_MPIAIJ, 1615 MatGetDiagonal_MPIAIJ, 1616 MatDiagonalScale_MPIAIJ, 1617 MatNorm_MPIAIJ, 1618 /*20*/ MatAssemblyBegin_MPIAIJ, 1619 MatAssemblyEnd_MPIAIJ, 1620 0, 1621 MatSetOption_MPIAIJ, 1622 MatZeroEntries_MPIAIJ, 1623 /*25*/ MatZeroRows_MPIAIJ, 1624 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1625 MatLUFactorSymbolic_MPIAIJ_TFS, 1626 #else 1627 0, 1628 #endif 1629 0, 1630 0, 1631 0, 1632 /*30*/ MatSetUpPreallocation_MPIAIJ, 1633 0, 1634 0, 1635 0, 1636 0, 1637 /*35*/ MatDuplicate_MPIAIJ, 1638 0, 1639 0, 1640 0, 1641 0, 1642 /*40*/ MatAXPY_MPIAIJ, 1643 MatGetSubMatrices_MPIAIJ, 1644 MatIncreaseOverlap_MPIAIJ, 1645 MatGetValues_MPIAIJ, 1646 MatCopy_MPIAIJ, 1647 /*45*/ MatPrintHelp_MPIAIJ, 1648 MatScale_MPIAIJ, 1649 0, 1650 0, 1651 0, 1652 /*50*/ MatGetBlockSize_MPIAIJ, 1653 0, 1654 0, 1655 0, 1656 0, 1657 /*55*/ MatFDColoringCreate_MPIAIJ, 1658 0, 1659 MatSetUnfactored_MPIAIJ, 1660 0, 1661 0, 1662 /*60*/ MatGetSubMatrix_MPIAIJ, 1663 MatDestroy_MPIAIJ, 1664 MatView_MPIAIJ, 1665 MatGetPetscMaps_Petsc, 1666 0, 1667 /*65*/ 0, 1668 0, 1669 0, 1670 0, 1671 0, 1672 /*70*/ 0, 1673 0, 1674 MatSetColoring_MPIAIJ, 1675 MatSetValuesAdic_MPIAIJ, 1676 MatSetValuesAdifor_MPIAIJ, 1677 /*75*/ 0, 1678 0, 1679 0, 1680 0, 1681 0, 1682 /*80*/ 0, 1683 0, 1684 0, 1685 0, 1686 /*84*/ MatLoad_MPIAIJ, 1687 0, 1688 0, 1689 0, 1690 0, 1691 0, 1692 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1693 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1694 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1695 MatPtAP_MPIAIJ_MPIAIJ, 1696 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1697 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 1698 0, 1699 0, 1700 0}; 1701 1702 /* ----------------------------------------------------------------------------------------*/ 1703 1704 EXTERN_C_BEGIN 1705 #undef __FUNCT__ 1706 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1707 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1708 { 1709 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1710 PetscErrorCode ierr; 1711 1712 PetscFunctionBegin; 1713 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1714 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1715 PetscFunctionReturn(0); 1716 } 1717 EXTERN_C_END 1718 1719 EXTERN_C_BEGIN 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1722 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1723 { 1724 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1725 PetscErrorCode ierr; 1726 1727 PetscFunctionBegin; 1728 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1729 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1730 PetscFunctionReturn(0); 1731 } 1732 EXTERN_C_END 1733 1734 #include "petscpc.h" 1735 EXTERN_C_BEGIN 1736 #undef __FUNCT__ 1737 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1738 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1739 { 1740 Mat_MPIAIJ *b; 1741 PetscErrorCode ierr; 1742 int i; 1743 1744 PetscFunctionBegin; 1745 B->preallocated = PETSC_TRUE; 1746 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1747 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1748 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1749 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1750 if (d_nnz) { 1751 for (i=0; i<B->m; i++) { 1752 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1753 } 1754 } 1755 if (o_nnz) { 1756 for (i=0; i<B->m; i++) { 1757 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1758 } 1759 } 1760 b = (Mat_MPIAIJ*)B->data; 1761 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1762 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1763 1764 PetscFunctionReturn(0); 1765 } 1766 EXTERN_C_END 1767 1768 /*MC 1769 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1770 1771 Options Database Keys: 1772 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1773 1774 Level: beginner 1775 1776 .seealso: MatCreateMPIAIJ 1777 M*/ 1778 1779 EXTERN_C_BEGIN 1780 #undef __FUNCT__ 1781 #define __FUNCT__ "MatCreate_MPIAIJ" 1782 PetscErrorCode MatCreate_MPIAIJ(Mat B) 1783 { 1784 Mat_MPIAIJ *b; 1785 PetscErrorCode ierr; 1786 int i,size; 1787 1788 PetscFunctionBegin; 1789 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1790 1791 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1792 B->data = (void*)b; 1793 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1794 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1795 B->factor = 0; 1796 B->assembled = PETSC_FALSE; 1797 B->mapping = 0; 1798 1799 B->insertmode = NOT_SET_VALUES; 1800 b->size = size; 1801 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1802 1803 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1804 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1805 1806 /* the information in the maps duplicates the information computed below, eventually 1807 we should remove the duplicate information that is not contained in the maps */ 1808 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1809 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1810 1811 /* build local table of row and column ownerships */ 1812 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1813 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1814 b->cowners = b->rowners + b->size + 2; 1815 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1816 b->rowners[0] = 0; 1817 for (i=2; i<=b->size; i++) { 1818 b->rowners[i] += b->rowners[i-1]; 1819 } 1820 b->rstart = b->rowners[b->rank]; 1821 b->rend = b->rowners[b->rank+1]; 1822 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1823 b->cowners[0] = 0; 1824 for (i=2; i<=b->size; i++) { 1825 b->cowners[i] += b->cowners[i-1]; 1826 } 1827 b->cstart = b->cowners[b->rank]; 1828 b->cend = b->cowners[b->rank+1]; 1829 1830 /* build cache for off array entries formed */ 1831 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1832 b->donotstash = PETSC_FALSE; 1833 b->colmap = 0; 1834 b->garray = 0; 1835 b->roworiented = PETSC_TRUE; 1836 1837 /* stuff used for matrix vector multiply */ 1838 b->lvec = PETSC_NULL; 1839 b->Mvctx = PETSC_NULL; 1840 1841 /* stuff for MatGetRow() */ 1842 b->rowindices = 0; 1843 b->rowvalues = 0; 1844 b->getrowactive = PETSC_FALSE; 1845 1846 /* Explicitly create 2 MATSEQAIJ matrices. */ 1847 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1848 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1849 PetscLogObjectParent(B,b->A); 1850 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1851 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1852 PetscLogObjectParent(B,b->B); 1853 1854 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1855 "MatStoreValues_MPIAIJ", 1856 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1857 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1858 "MatRetrieveValues_MPIAIJ", 1859 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1860 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1861 "MatGetDiagonalBlock_MPIAIJ", 1862 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1863 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1864 "MatIsTranspose_MPIAIJ", 1865 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 1866 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1867 "MatMPIAIJSetPreallocation_MPIAIJ", 1868 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 1869 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1870 "MatDiagonalScaleLocal_MPIAIJ", 1871 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 1872 PetscFunctionReturn(0); 1873 } 1874 EXTERN_C_END 1875 1876 /*MC 1877 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1878 1879 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1880 and MATMPIAIJ otherwise. As a result, for single process communicators, 1881 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1882 for communicators controlling multiple processes. It is recommended that you call both of 1883 the above preallocation routines for simplicity. 1884 1885 Options Database Keys: 1886 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1887 1888 Level: beginner 1889 1890 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1891 M*/ 1892 1893 EXTERN_C_BEGIN 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatCreate_AIJ" 1896 PetscErrorCode MatCreate_AIJ(Mat A) 1897 { 1898 PetscErrorCode ierr; 1899 int size; 1900 1901 PetscFunctionBegin; 1902 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1903 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1904 if (size == 1) { 1905 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1906 } else { 1907 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1908 } 1909 PetscFunctionReturn(0); 1910 } 1911 EXTERN_C_END 1912 1913 #undef __FUNCT__ 1914 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1915 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1916 { 1917 Mat mat; 1918 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1919 PetscErrorCode ierr; 1920 1921 PetscFunctionBegin; 1922 *newmat = 0; 1923 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1924 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1925 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1926 a = (Mat_MPIAIJ*)mat->data; 1927 1928 mat->factor = matin->factor; 1929 mat->assembled = PETSC_TRUE; 1930 mat->insertmode = NOT_SET_VALUES; 1931 mat->preallocated = PETSC_TRUE; 1932 1933 a->rstart = oldmat->rstart; 1934 a->rend = oldmat->rend; 1935 a->cstart = oldmat->cstart; 1936 a->cend = oldmat->cend; 1937 a->size = oldmat->size; 1938 a->rank = oldmat->rank; 1939 a->donotstash = oldmat->donotstash; 1940 a->roworiented = oldmat->roworiented; 1941 a->rowindices = 0; 1942 a->rowvalues = 0; 1943 a->getrowactive = PETSC_FALSE; 1944 1945 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1946 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1947 if (oldmat->colmap) { 1948 #if defined (PETSC_USE_CTABLE) 1949 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1950 #else 1951 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1952 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1953 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1954 #endif 1955 } else a->colmap = 0; 1956 if (oldmat->garray) { 1957 int len; 1958 len = oldmat->B->n; 1959 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1960 PetscLogObjectMemory(mat,len*sizeof(int)); 1961 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1962 } else a->garray = 0; 1963 1964 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1965 PetscLogObjectParent(mat,a->lvec); 1966 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1967 PetscLogObjectParent(mat,a->Mvctx); 1968 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1969 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1970 PetscLogObjectParent(mat,a->A); 1971 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1972 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1973 PetscLogObjectParent(mat,a->B); 1974 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1975 *newmat = mat; 1976 PetscFunctionReturn(0); 1977 } 1978 1979 #include "petscsys.h" 1980 1981 #undef __FUNCT__ 1982 #define __FUNCT__ "MatLoad_MPIAIJ" 1983 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1984 { 1985 Mat A; 1986 PetscScalar *vals,*svals; 1987 MPI_Comm comm = ((PetscObject)viewer)->comm; 1988 MPI_Status status; 1989 PetscErrorCode ierr; 1990 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1991 int i,nz,j,rstart,rend,fd; 1992 int header[4],*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1993 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1994 int cend,cstart,n; 1995 1996 PetscFunctionBegin; 1997 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1998 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1999 if (!rank) { 2000 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2001 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2002 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2003 if (header[3] < 0) { 2004 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 2005 } 2006 } 2007 2008 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2009 M = header[1]; N = header[2]; 2010 /* determine ownership of all rows */ 2011 m = M/size + ((M % size) > rank); 2012 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2013 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2014 rowners[0] = 0; 2015 for (i=2; i<=size; i++) { 2016 rowners[i] += rowners[i-1]; 2017 } 2018 rstart = rowners[rank]; 2019 rend = rowners[rank+1]; 2020 2021 /* distribute row lengths to all processors */ 2022 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 2023 offlens = ourlens + (rend-rstart); 2024 if (!rank) { 2025 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 2026 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2027 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2028 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2029 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2030 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2031 } else { 2032 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2033 } 2034 2035 if (!rank) { 2036 /* calculate the number of nonzeros on each processor */ 2037 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2038 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2039 for (i=0; i<size; i++) { 2040 for (j=rowners[i]; j< rowners[i+1]; j++) { 2041 procsnz[i] += rowlengths[j]; 2042 } 2043 } 2044 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2045 2046 /* determine max buffer needed and allocate it */ 2047 maxnz = 0; 2048 for (i=0; i<size; i++) { 2049 maxnz = PetscMax(maxnz,procsnz[i]); 2050 } 2051 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2052 2053 /* read in my part of the matrix column indices */ 2054 nz = procsnz[0]; 2055 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 2056 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2057 2058 /* read in every one elses and ship off */ 2059 for (i=1; i<size; i++) { 2060 nz = procsnz[i]; 2061 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2062 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2063 } 2064 ierr = PetscFree(cols);CHKERRQ(ierr); 2065 } else { 2066 /* determine buffer space needed for message */ 2067 nz = 0; 2068 for (i=0; i<m; i++) { 2069 nz += ourlens[i]; 2070 } 2071 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2072 2073 /* receive message of column indices*/ 2074 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2075 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2076 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2077 } 2078 2079 /* determine column ownership if matrix is not square */ 2080 if (N != M) { 2081 n = N/size + ((N % size) > rank); 2082 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2083 cstart = cend - n; 2084 } else { 2085 cstart = rstart; 2086 cend = rend; 2087 n = cend - cstart; 2088 } 2089 2090 /* loop over local rows, determining number of off diagonal entries */ 2091 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2092 jj = 0; 2093 for (i=0; i<m; i++) { 2094 for (j=0; j<ourlens[i]; j++) { 2095 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2096 jj++; 2097 } 2098 } 2099 2100 /* create our matrix */ 2101 for (i=0; i<m; i++) { 2102 ourlens[i] -= offlens[i]; 2103 } 2104 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2105 ierr = MatSetType(A,type);CHKERRQ(ierr); 2106 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2107 2108 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2109 for (i=0; i<m; i++) { 2110 ourlens[i] += offlens[i]; 2111 } 2112 2113 if (!rank) { 2114 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2115 2116 /* read in my part of the matrix numerical values */ 2117 nz = procsnz[0]; 2118 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2119 2120 /* insert into matrix */ 2121 jj = rstart; 2122 smycols = mycols; 2123 svals = vals; 2124 for (i=0; i<m; i++) { 2125 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2126 smycols += ourlens[i]; 2127 svals += ourlens[i]; 2128 jj++; 2129 } 2130 2131 /* read in other processors and ship out */ 2132 for (i=1; i<size; i++) { 2133 nz = procsnz[i]; 2134 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2135 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2136 } 2137 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2138 } else { 2139 /* receive numeric values */ 2140 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2141 2142 /* receive message of values*/ 2143 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2144 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2145 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2146 2147 /* insert into matrix */ 2148 jj = rstart; 2149 smycols = mycols; 2150 svals = vals; 2151 for (i=0; i<m; i++) { 2152 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2153 smycols += ourlens[i]; 2154 svals += ourlens[i]; 2155 jj++; 2156 } 2157 } 2158 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2159 ierr = PetscFree(vals);CHKERRQ(ierr); 2160 ierr = PetscFree(mycols);CHKERRQ(ierr); 2161 ierr = PetscFree(rowners);CHKERRQ(ierr); 2162 2163 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2164 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2165 *newmat = A; 2166 PetscFunctionReturn(0); 2167 } 2168 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2171 /* 2172 Not great since it makes two copies of the submatrix, first an SeqAIJ 2173 in local and then by concatenating the local matrices the end result. 2174 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2175 */ 2176 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2177 { 2178 PetscErrorCode ierr; 2179 PetscMPIInt rank,size; 2180 int i,m,n,rstart,row,rend,nz,*cwork,j; 2181 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2182 Mat *local,M,Mreuse; 2183 PetscScalar *vwork,*aa; 2184 MPI_Comm comm = mat->comm; 2185 Mat_SeqAIJ *aij; 2186 2187 2188 PetscFunctionBegin; 2189 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2190 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2191 2192 if (call == MAT_REUSE_MATRIX) { 2193 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2194 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2195 local = &Mreuse; 2196 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2197 } else { 2198 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2199 Mreuse = *local; 2200 ierr = PetscFree(local);CHKERRQ(ierr); 2201 } 2202 2203 /* 2204 m - number of local rows 2205 n - number of columns (same on all processors) 2206 rstart - first row in new global matrix generated 2207 */ 2208 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2209 if (call == MAT_INITIAL_MATRIX) { 2210 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2211 ii = aij->i; 2212 jj = aij->j; 2213 2214 /* 2215 Determine the number of non-zeros in the diagonal and off-diagonal 2216 portions of the matrix in order to do correct preallocation 2217 */ 2218 2219 /* first get start and end of "diagonal" columns */ 2220 if (csize == PETSC_DECIDE) { 2221 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2222 if (mglobal == n) { /* square matrix */ 2223 nlocal = m; 2224 } else { 2225 nlocal = n/size + ((n % size) > rank); 2226 } 2227 } else { 2228 nlocal = csize; 2229 } 2230 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2231 rstart = rend - nlocal; 2232 if (rank == size - 1 && rend != n) { 2233 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2234 } 2235 2236 /* next, compute all the lengths */ 2237 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2238 olens = dlens + m; 2239 for (i=0; i<m; i++) { 2240 jend = ii[i+1] - ii[i]; 2241 olen = 0; 2242 dlen = 0; 2243 for (j=0; j<jend; j++) { 2244 if (*jj < rstart || *jj >= rend) olen++; 2245 else dlen++; 2246 jj++; 2247 } 2248 olens[i] = olen; 2249 dlens[i] = dlen; 2250 } 2251 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2252 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2253 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2254 ierr = PetscFree(dlens);CHKERRQ(ierr); 2255 } else { 2256 int ml,nl; 2257 2258 M = *newmat; 2259 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2260 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2261 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2262 /* 2263 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2264 rather than the slower MatSetValues(). 2265 */ 2266 M->was_assembled = PETSC_TRUE; 2267 M->assembled = PETSC_FALSE; 2268 } 2269 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2270 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2271 ii = aij->i; 2272 jj = aij->j; 2273 aa = aij->a; 2274 for (i=0; i<m; i++) { 2275 row = rstart + i; 2276 nz = ii[i+1] - ii[i]; 2277 cwork = jj; jj += nz; 2278 vwork = aa; aa += nz; 2279 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2280 } 2281 2282 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2283 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2284 *newmat = M; 2285 2286 /* save submatrix used in processor for next request */ 2287 if (call == MAT_INITIAL_MATRIX) { 2288 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2289 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2290 } 2291 2292 PetscFunctionReturn(0); 2293 } 2294 2295 #undef __FUNCT__ 2296 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2297 /*@C 2298 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2299 (the default parallel PETSc format). For good matrix assembly performance 2300 the user should preallocate the matrix storage by setting the parameters 2301 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2302 performance can be increased by more than a factor of 50. 2303 2304 Collective on MPI_Comm 2305 2306 Input Parameters: 2307 + A - the matrix 2308 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2309 (same value is used for all local rows) 2310 . d_nnz - array containing the number of nonzeros in the various rows of the 2311 DIAGONAL portion of the local submatrix (possibly different for each row) 2312 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2313 The size of this array is equal to the number of local rows, i.e 'm'. 2314 You must leave room for the diagonal entry even if it is zero. 2315 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2316 submatrix (same value is used for all local rows). 2317 - o_nnz - array containing the number of nonzeros in the various rows of the 2318 OFF-DIAGONAL portion of the local submatrix (possibly different for 2319 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2320 structure. The size of this array is equal to the number 2321 of local rows, i.e 'm'. 2322 2323 The AIJ format (also called the Yale sparse matrix format or 2324 compressed row storage), is fully compatible with standard Fortran 77 2325 storage. That is, the stored row and column indices can begin at 2326 either one (as in Fortran) or zero. See the users manual for details. 2327 2328 The user MUST specify either the local or global matrix dimensions 2329 (possibly both). 2330 2331 The parallel matrix is partitioned such that the first m0 rows belong to 2332 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2333 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2334 2335 The DIAGONAL portion of the local submatrix of a processor can be defined 2336 as the submatrix which is obtained by extraction the part corresponding 2337 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2338 first row that belongs to the processor, and r2 is the last row belonging 2339 to the this processor. This is a square mxm matrix. The remaining portion 2340 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2341 2342 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2343 2344 By default, this format uses inodes (identical nodes) when possible. 2345 We search for consecutive rows with the same nonzero structure, thereby 2346 reusing matrix information to achieve increased efficiency. 2347 2348 Options Database Keys: 2349 + -mat_aij_no_inode - Do not use inodes 2350 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2351 - -mat_aij_oneindex - Internally use indexing starting at 1 2352 rather than 0. Note that when calling MatSetValues(), 2353 the user still MUST index entries starting at 0! 2354 2355 Example usage: 2356 2357 Consider the following 8x8 matrix with 34 non-zero values, that is 2358 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2359 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2360 as follows: 2361 2362 .vb 2363 1 2 0 | 0 3 0 | 0 4 2364 Proc0 0 5 6 | 7 0 0 | 8 0 2365 9 0 10 | 11 0 0 | 12 0 2366 ------------------------------------- 2367 13 0 14 | 15 16 17 | 0 0 2368 Proc1 0 18 0 | 19 20 21 | 0 0 2369 0 0 0 | 22 23 0 | 24 0 2370 ------------------------------------- 2371 Proc2 25 26 27 | 0 0 28 | 29 0 2372 30 0 0 | 31 32 33 | 0 34 2373 .ve 2374 2375 This can be represented as a collection of submatrices as: 2376 2377 .vb 2378 A B C 2379 D E F 2380 G H I 2381 .ve 2382 2383 Where the submatrices A,B,C are owned by proc0, D,E,F are 2384 owned by proc1, G,H,I are owned by proc2. 2385 2386 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2387 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2388 The 'M','N' parameters are 8,8, and have the same values on all procs. 2389 2390 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2391 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2392 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2393 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2394 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2395 matrix, ans [DF] as another SeqAIJ matrix. 2396 2397 When d_nz, o_nz parameters are specified, d_nz storage elements are 2398 allocated for every row of the local diagonal submatrix, and o_nz 2399 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2400 One way to choose d_nz and o_nz is to use the max nonzerors per local 2401 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2402 In this case, the values of d_nz,o_nz are: 2403 .vb 2404 proc0 : dnz = 2, o_nz = 2 2405 proc1 : dnz = 3, o_nz = 2 2406 proc2 : dnz = 1, o_nz = 4 2407 .ve 2408 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2409 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2410 for proc3. i.e we are using 12+15+10=37 storage locations to store 2411 34 values. 2412 2413 When d_nnz, o_nnz parameters are specified, the storage is specified 2414 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2415 In the above case the values for d_nnz,o_nnz are: 2416 .vb 2417 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2418 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2419 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2420 .ve 2421 Here the space allocated is sum of all the above values i.e 34, and 2422 hence pre-allocation is perfect. 2423 2424 Level: intermediate 2425 2426 .keywords: matrix, aij, compressed row, sparse, parallel 2427 2428 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2429 @*/ 2430 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2431 { 2432 PetscErrorCode ierr,(*f)(Mat,int,const int[],int,const int[]); 2433 2434 PetscFunctionBegin; 2435 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2436 if (f) { 2437 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2438 } 2439 PetscFunctionReturn(0); 2440 } 2441 2442 #undef __FUNCT__ 2443 #define __FUNCT__ "MatCreateMPIAIJ" 2444 /*@C 2445 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2446 (the default parallel PETSc format). For good matrix assembly performance 2447 the user should preallocate the matrix storage by setting the parameters 2448 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2449 performance can be increased by more than a factor of 50. 2450 2451 Collective on MPI_Comm 2452 2453 Input Parameters: 2454 + comm - MPI communicator 2455 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2456 This value should be the same as the local size used in creating the 2457 y vector for the matrix-vector product y = Ax. 2458 . n - This value should be the same as the local size used in creating the 2459 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2460 calculated if N is given) For square matrices n is almost always m. 2461 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2462 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2463 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2464 (same value is used for all local rows) 2465 . d_nnz - array containing the number of nonzeros in the various rows of the 2466 DIAGONAL portion of the local submatrix (possibly different for each row) 2467 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2468 The size of this array is equal to the number of local rows, i.e 'm'. 2469 You must leave room for the diagonal entry even if it is zero. 2470 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2471 submatrix (same value is used for all local rows). 2472 - o_nnz - array containing the number of nonzeros in the various rows of the 2473 OFF-DIAGONAL portion of the local submatrix (possibly different for 2474 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2475 structure. The size of this array is equal to the number 2476 of local rows, i.e 'm'. 2477 2478 Output Parameter: 2479 . A - the matrix 2480 2481 Notes: 2482 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2483 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2484 storage requirements for this matrix. 2485 2486 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2487 processor than it must be used on all processors that share the object for 2488 that argument. 2489 2490 The AIJ format (also called the Yale sparse matrix format or 2491 compressed row storage), is fully compatible with standard Fortran 77 2492 storage. That is, the stored row and column indices can begin at 2493 either one (as in Fortran) or zero. See the users manual for details. 2494 2495 The user MUST specify either the local or global matrix dimensions 2496 (possibly both). 2497 2498 The parallel matrix is partitioned such that the first m0 rows belong to 2499 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2500 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2501 2502 The DIAGONAL portion of the local submatrix of a processor can be defined 2503 as the submatrix which is obtained by extraction the part corresponding 2504 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2505 first row that belongs to the processor, and r2 is the last row belonging 2506 to the this processor. This is a square mxm matrix. The remaining portion 2507 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2508 2509 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2510 2511 When calling this routine with a single process communicator, a matrix of 2512 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2513 type of communicator, use the construction mechanism: 2514 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2515 2516 By default, this format uses inodes (identical nodes) when possible. 2517 We search for consecutive rows with the same nonzero structure, thereby 2518 reusing matrix information to achieve increased efficiency. 2519 2520 Options Database Keys: 2521 + -mat_aij_no_inode - Do not use inodes 2522 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2523 - -mat_aij_oneindex - Internally use indexing starting at 1 2524 rather than 0. Note that when calling MatSetValues(), 2525 the user still MUST index entries starting at 0! 2526 2527 2528 Example usage: 2529 2530 Consider the following 8x8 matrix with 34 non-zero values, that is 2531 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2532 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2533 as follows: 2534 2535 .vb 2536 1 2 0 | 0 3 0 | 0 4 2537 Proc0 0 5 6 | 7 0 0 | 8 0 2538 9 0 10 | 11 0 0 | 12 0 2539 ------------------------------------- 2540 13 0 14 | 15 16 17 | 0 0 2541 Proc1 0 18 0 | 19 20 21 | 0 0 2542 0 0 0 | 22 23 0 | 24 0 2543 ------------------------------------- 2544 Proc2 25 26 27 | 0 0 28 | 29 0 2545 30 0 0 | 31 32 33 | 0 34 2546 .ve 2547 2548 This can be represented as a collection of submatrices as: 2549 2550 .vb 2551 A B C 2552 D E F 2553 G H I 2554 .ve 2555 2556 Where the submatrices A,B,C are owned by proc0, D,E,F are 2557 owned by proc1, G,H,I are owned by proc2. 2558 2559 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2560 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2561 The 'M','N' parameters are 8,8, and have the same values on all procs. 2562 2563 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2564 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2565 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2566 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2567 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2568 matrix, ans [DF] as another SeqAIJ matrix. 2569 2570 When d_nz, o_nz parameters are specified, d_nz storage elements are 2571 allocated for every row of the local diagonal submatrix, and o_nz 2572 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2573 One way to choose d_nz and o_nz is to use the max nonzerors per local 2574 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2575 In this case, the values of d_nz,o_nz are: 2576 .vb 2577 proc0 : dnz = 2, o_nz = 2 2578 proc1 : dnz = 3, o_nz = 2 2579 proc2 : dnz = 1, o_nz = 4 2580 .ve 2581 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2582 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2583 for proc3. i.e we are using 12+15+10=37 storage locations to store 2584 34 values. 2585 2586 When d_nnz, o_nnz parameters are specified, the storage is specified 2587 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2588 In the above case the values for d_nnz,o_nnz are: 2589 .vb 2590 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2591 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2592 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2593 .ve 2594 Here the space allocated is sum of all the above values i.e 34, and 2595 hence pre-allocation is perfect. 2596 2597 Level: intermediate 2598 2599 .keywords: matrix, aij, compressed row, sparse, parallel 2600 2601 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2602 @*/ 2603 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2604 { 2605 PetscErrorCode ierr; 2606 int size; 2607 2608 PetscFunctionBegin; 2609 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2610 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2611 if (size > 1) { 2612 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2613 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2614 } else { 2615 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2616 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2617 } 2618 PetscFunctionReturn(0); 2619 } 2620 2621 #undef __FUNCT__ 2622 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2623 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2624 { 2625 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2626 PetscFunctionBegin; 2627 *Ad = a->A; 2628 *Ao = a->B; 2629 *colmap = a->garray; 2630 PetscFunctionReturn(0); 2631 } 2632 2633 #undef __FUNCT__ 2634 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2635 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2636 { 2637 PetscErrorCode ierr; 2638 int i; 2639 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2640 2641 PetscFunctionBegin; 2642 if (coloring->ctype == IS_COLORING_LOCAL) { 2643 ISColoringValue *allcolors,*colors; 2644 ISColoring ocoloring; 2645 2646 /* set coloring for diagonal portion */ 2647 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2648 2649 /* set coloring for off-diagonal portion */ 2650 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2651 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2652 for (i=0; i<a->B->n; i++) { 2653 colors[i] = allcolors[a->garray[i]]; 2654 } 2655 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2656 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2657 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2658 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2659 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2660 ISColoringValue *colors; 2661 int *larray; 2662 ISColoring ocoloring; 2663 2664 /* set coloring for diagonal portion */ 2665 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2666 for (i=0; i<a->A->n; i++) { 2667 larray[i] = i + a->cstart; 2668 } 2669 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2670 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2671 for (i=0; i<a->A->n; i++) { 2672 colors[i] = coloring->colors[larray[i]]; 2673 } 2674 ierr = PetscFree(larray);CHKERRQ(ierr); 2675 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2676 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2677 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2678 2679 /* set coloring for off-diagonal portion */ 2680 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2681 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2682 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2683 for (i=0; i<a->B->n; i++) { 2684 colors[i] = coloring->colors[larray[i]]; 2685 } 2686 ierr = PetscFree(larray);CHKERRQ(ierr); 2687 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2688 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2689 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2690 } else { 2691 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2692 } 2693 2694 PetscFunctionReturn(0); 2695 } 2696 2697 #undef __FUNCT__ 2698 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2699 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2700 { 2701 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2702 PetscErrorCode ierr; 2703 2704 PetscFunctionBegin; 2705 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2706 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2707 PetscFunctionReturn(0); 2708 } 2709 2710 #undef __FUNCT__ 2711 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2712 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2713 { 2714 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2715 PetscErrorCode ierr; 2716 2717 PetscFunctionBegin; 2718 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2719 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2720 PetscFunctionReturn(0); 2721 } 2722 2723 #undef __FUNCT__ 2724 #define __FUNCT__ "MatMerge" 2725 /*@C 2726 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2727 matrices from each processor 2728 2729 Collective on MPI_Comm 2730 2731 Input Parameters: 2732 + comm - the communicators the parallel matrix will live on 2733 . inmat - the input sequential matrices 2734 . n - number of local columns (or PETSC_DECIDE) 2735 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2736 2737 Output Parameter: 2738 . outmat - the parallel matrix generated 2739 2740 Level: advanced 2741 2742 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2743 2744 @*/ 2745 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2746 { 2747 PetscErrorCode ierr; 2748 int m,N,i,rstart,nnz,I,*dnz,*onz; 2749 const int *indx; 2750 const PetscScalar *values; 2751 PetscMap columnmap,rowmap; 2752 2753 PetscFunctionBegin; 2754 2755 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2756 if (scall == MAT_INITIAL_MATRIX){ 2757 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2758 if (n == PETSC_DECIDE){ 2759 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2760 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2761 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2762 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2763 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2764 } 2765 2766 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2767 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2768 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2769 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2770 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2771 2772 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2773 for (i=0;i<m;i++) { 2774 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2775 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2776 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2777 } 2778 /* This routine will ONLY return MPIAIJ type matrix */ 2779 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2780 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2781 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2782 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2783 2784 } else if (scall == MAT_REUSE_MATRIX){ 2785 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2786 } else { 2787 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 2788 } 2789 2790 for (i=0;i<m;i++) { 2791 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2792 I = i + rstart; 2793 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2794 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2795 } 2796 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2797 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2798 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2799 2800 PetscFunctionReturn(0); 2801 } 2802 2803 #undef __FUNCT__ 2804 #define __FUNCT__ "MatFileSplit" 2805 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2806 { 2807 PetscErrorCode ierr; 2808 PetscMPIInt rank; 2809 int m,N,i,rstart,nnz; 2810 size_t len; 2811 const int *indx; 2812 PetscViewer out; 2813 char *name; 2814 Mat B; 2815 const PetscScalar *values; 2816 2817 PetscFunctionBegin; 2818 2819 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2820 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2821 /* Should this be the type of the diagonal block of A? */ 2822 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2823 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2824 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2825 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2826 for (i=0;i<m;i++) { 2827 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2828 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2829 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2830 } 2831 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2832 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2833 2834 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2835 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2836 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2837 sprintf(name,"%s.%d",outfile,rank); 2838 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2839 ierr = PetscFree(name); 2840 ierr = MatView(B,out);CHKERRQ(ierr); 2841 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2842 ierr = MatDestroy(B);CHKERRQ(ierr); 2843 PetscFunctionReturn(0); 2844 } 2845 2846 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2847 #undef __FUNCT__ 2848 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2849 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2850 { 2851 PetscErrorCode ierr; 2852 Mat_Merge_SeqsToMPI *merge=(Mat_Merge_SeqsToMPI*)A->spptr; 2853 2854 PetscFunctionBegin; 2855 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2856 ierr = PetscFree(merge->abuf_s);CHKERRQ(ierr); 2857 ierr = PetscFree(merge->len_sra);CHKERRQ(ierr); 2858 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2859 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2860 ierr = PetscFree(merge->ijbuf_r);CHKERRQ(ierr); 2861 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2862 ierr = MatDestroy(merge->C_seq);CHKERRQ(ierr); 2863 ierr = PetscFree(merge);CHKERRQ(ierr); 2864 2865 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2866 PetscFunctionReturn(0); 2867 } 2868 2869 #include "src/mat/utils/freespace.h" 2870 #include "petscbt.h" 2871 #undef __FUNCT__ 2872 #define __FUNCT__ "MatMerge_SeqsToMPI" 2873 /*@C 2874 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2875 matrices from each processor 2876 2877 Collective on MPI_Comm 2878 2879 Input Parameters: 2880 + comm - the communicators the parallel matrix will live on 2881 . seqmat - the input sequential matrices 2882 . m - number of local rows (or PETSC_DECIDE) 2883 . n - number of local columns (or PETSC_DECIDE) 2884 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2885 2886 Output Parameter: 2887 . mpimat - the parallel matrix generated 2888 2889 Level: advanced 2890 2891 Notes: The dimensions of the sequential matrix in each processor 2892 MUST be the same. 2893 2894 @*/ 2895 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 2896 { 2897 PetscErrorCode ierr; 2898 Mat B_seq,B_mpi; 2899 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2900 PetscMPIInt size,rank; 2901 int M=seqmat->m,N=seqmat->n,i,j,*owners,*ai=a->i,*aj=a->j; 2902 int tag,taga,len,len_a = 0,*len_s,*len_sa,proc,*len_r,*len_ra; 2903 int **ijbuf_r,*ijbuf_s,*nnz_ptr,k,anzi,*bj_i,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0,nextaj; 2904 MPI_Request *s_waits,*r_waits,*s_waitsa,*r_waitsa; 2905 MPI_Status *status; 2906 MatScalar *ba,*aa=a->a,**abuf_r,*abuf_i,*ba_i; 2907 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2908 PetscBT lnkbt; 2909 Mat_Merge_SeqsToMPI *merge; 2910 2911 PetscFunctionBegin; 2912 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2913 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2914 /* ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] nnz(C): %d\n",rank,ai[M]); */ 2915 if (scall == MAT_INITIAL_MATRIX){ 2916 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 2917 } else if (scall == MAT_REUSE_MATRIX){ 2918 B_mpi = *mpimat; 2919 merge = (Mat_Merge_SeqsToMPI*)B_mpi->spptr; 2920 bi = merge->bi; 2921 bj = merge->bj; 2922 ijbuf_r = merge->ijbuf_r; 2923 } else { 2924 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 2925 } 2926 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2927 2928 /* determine the number of messages to send, their lengths */ 2929 /*---------------------------------------------------------*/ 2930 if (scall == MAT_INITIAL_MATRIX){ 2931 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 2932 if (m == PETSC_DECIDE) { 2933 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 2934 } else { 2935 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 2936 } 2937 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 2938 ierr = PetscMalloc(size*sizeof(int),&len_s);CHKERRQ(ierr); 2939 ierr = PetscMalloc(2*size*sizeof(int),&merge->len_sra);CHKERRQ(ierr); 2940 } 2941 if (m == PETSC_DECIDE) ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2942 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2943 2944 len_sa = merge->len_sra; 2945 len_ra = merge->len_sra + size; 2946 2947 if (scall == MAT_INITIAL_MATRIX){ 2948 merge->nsend = 0; 2949 len = len_a = 0; 2950 for (proc=0; proc<size; proc++){ 2951 len_s[proc] = len_sa[proc] = 0; 2952 if (proc == rank) continue; 2953 for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */ 2954 len_sa[proc] += ai[i+1] - ai[i]; /* num of cols sent to [proc] */ 2955 } 2956 if (len_sa[proc]) { 2957 merge->nsend++; 2958 len_s[proc] = len_sa[proc] + owners[proc+1] - owners[proc]; 2959 if (len < len_s[proc]) len = len_s[proc]; 2960 if (len_a < len_sa[proc]) len_a = len_sa[proc]; 2961 } 2962 } 2963 2964 /* determine the number and length of messages to receive */ 2965 /*--------------------------------------------------------*/ 2966 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 2967 ierr = PetscGatherMessageLengths(comm,merge->nsend,merge->nrecv,len_s,&merge->id_r,&len_r);CHKERRQ(ierr); 2968 /* 2969 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] nsend: %d, nrecv: %d\n",rank,merge->nsend,merge->nrecv); 2970 for (i=0; i<merge->nrecv; i++){ 2971 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] expects recv len_r=%d from [%d]\n",rank,len_r[i],merge->id_r[i]); 2972 } 2973 */ 2974 2975 /* post the Irecvs corresponding to these messages */ 2976 /*-------------------------------------------------*/ 2977 /* get new tag to keep the communication clean */ 2978 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tag);CHKERRQ(ierr); 2979 ierr = PetscPostIrecvInt(comm,tag,merge->nrecv,merge->id_r,len_r,&ijbuf_r,&r_waits);CHKERRQ(ierr); 2980 2981 /* post the sends */ 2982 /*----------------*/ 2983 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2984 ierr = PetscMalloc((len+1)*sizeof(int),&ijbuf_s);CHKERRQ(ierr); 2985 k = 0; 2986 for (proc=0; proc<size; proc++){ 2987 if (!len_s[proc]) continue; 2988 /* form outgoing ij data to be sent to [proc] */ 2989 nnz_ptr = ijbuf_s + owners[proc+1] - owners[proc]; 2990 for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */ 2991 anzi = ai[i+1] - ai[i]; 2992 if (i==owners[proc]){ 2993 ijbuf_s[i-owners[proc]] = anzi; 2994 } else { 2995 ijbuf_s[i-owners[proc]] = ijbuf_s[i-owners[proc]-1]+ anzi; 2996 } 2997 for (j=0; j <anzi; j++){ 2998 *nnz_ptr = *(aj+ai[i]+j); nnz_ptr++; 2999 } 3000 } 3001 ierr = MPI_Isend(ijbuf_s,len_s[proc],MPI_INT,proc,tag,comm,s_waits+k);CHKERRQ(ierr); 3002 k++; 3003 } 3004 3005 /* receives and sends of ij-structure are complete, deallocate memories */ 3006 /*----------------------------------------------------------------------*/ 3007 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 3008 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 3009 3010 ierr = PetscFree(ijbuf_s);CHKERRQ(ierr); 3011 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3012 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3013 } 3014 3015 /* send and recv matrix values */ 3016 /*-----------------------------*/ 3017 if (scall == MAT_INITIAL_MATRIX){ 3018 ierr = PetscMalloc((len_a+1)*sizeof(MatScalar),&merge->abuf_s);CHKERRQ(ierr); 3019 for (i=0; i<merge->nrecv; i++) len_ra[i] = len_r[i]-m; /* length of seqmat->a to be received from a proc */ 3020 } 3021 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 3022 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,len_ra,&abuf_r,&r_waitsa);CHKERRQ(ierr); 3023 3024 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waitsa);CHKERRQ(ierr); 3025 k = 0; 3026 for (proc=0; proc<size; proc++){ 3027 if (!len_sa[proc]) continue; 3028 /* form outgoing mat value data to be sent to [proc] */ 3029 abuf_i = merge->abuf_s; 3030 for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */ 3031 anzi = ai[i+1] - ai[i]; 3032 for (j=0; j <anzi; j++){ 3033 *abuf_i = *(aa+ai[i]+j); abuf_i++; 3034 } 3035 } 3036 ierr = MPI_Isend(merge->abuf_s,len_sa[proc],MPIU_MATSCALAR,proc,taga,comm,s_waitsa+k);CHKERRQ(ierr); 3037 k++; 3038 } 3039 3040 ierr = MPI_Waitall(merge->nrecv,r_waitsa,status);CHKERRQ(ierr); 3041 ierr = MPI_Waitall(merge->nsend,s_waitsa,status);CHKERRQ(ierr); 3042 ierr = PetscFree(status);CHKERRQ(ierr); 3043 3044 ierr = PetscFree(s_waitsa);CHKERRQ(ierr); 3045 ierr = PetscFree(r_waitsa);CHKERRQ(ierr); 3046 3047 /* create seq matrix B_seq in each processor */ 3048 /*-------------------------------------------*/ 3049 if (scall == MAT_INITIAL_MATRIX){ 3050 ierr = PetscFree(len_s);CHKERRQ(ierr); 3051 /* allocate bi array and free space for accumulating nonzero column info */ 3052 ierr = PetscMalloc((m+1)*sizeof(int),&bi);CHKERRQ(ierr); 3053 bi[0] = 0; 3054 3055 /* create and initialize a linked list */ 3056 nlnk = N+1; 3057 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3058 3059 /* initial FreeSpace size is (nproc)*(num of local nnz(seqmat)) */ 3060 len = 0; 3061 for (i=owners[rank]; i<owners[rank+1]; i++) len += ai[i+1] - ai[i]; 3062 ierr = GetMoreSpace((int)(size*len+1),&free_space);CHKERRQ(ierr); 3063 current_space = free_space; 3064 3065 /* determine symbolic info for each row of B_seq */ 3066 for (i=0;i<m;i++) { 3067 bnzi = 0; 3068 /* add local non-zero cols of this proc's seqmat into lnk */ 3069 arow = owners[rank] + i; 3070 anzi = ai[arow+1] - ai[arow]; 3071 aj = a->j + ai[arow]; 3072 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3073 bnzi += nlnk; 3074 /* add received col data into lnk */ 3075 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3076 /* i-th row */ 3077 if (i == 0){ 3078 anzi = *(ijbuf_r[k]+i); 3079 aj = ijbuf_r[k] + m; 3080 } else { 3081 anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1); 3082 aj = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1); 3083 } 3084 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3085 bnzi += nlnk; 3086 } 3087 3088 /* if free space is not available, make more free space */ 3089 if (current_space->local_remaining<bnzi) { 3090 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3091 nspacedouble++; 3092 } 3093 /* copy data into free space, then initialize lnk */ 3094 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3095 current_space->array += bnzi; 3096 current_space->local_used += bnzi; 3097 current_space->local_remaining -= bnzi; 3098 3099 bi[i+1] = bi[i] + bnzi; 3100 } 3101 ierr = PetscMalloc((bi[m]+1)*sizeof(int),&bj);CHKERRQ(ierr); 3102 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3103 3104 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3105 ierr = PetscFree(len_r);CHKERRQ(ierr); 3106 3107 /* allocate space for ba */ 3108 ierr = PetscMalloc((bi[m]+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3109 ierr = PetscMemzero(ba,(bi[m]+1)*sizeof(MatScalar));CHKERRQ(ierr); 3110 3111 /* put together B_seq */ 3112 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,bi,bj,ba,&B_seq);CHKERRQ(ierr); 3113 ierr = PetscFree(ba);CHKERRQ(ierr); /* bi and bj are saved for reuse */ 3114 3115 /* create symbolic parallel matrix B_mpi by concatinating B_seq */ 3116 /*--------------------------------------------------------------*/ 3117 ierr = MatMerge(comm,B_seq,n,scall,&B_mpi);CHKERRQ(ierr); 3118 } /* endof if (scall == MAT_INITIAL_MATRIX) */ 3119 3120 /* insert mat values of B_mpi */ 3121 /*----------------------------*/ 3122 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3123 3124 /* set values of ba */ 3125 for (i=0; i<m; i++) { 3126 arow = owners[rank] + i; 3127 bj_i = bj+bi[i]; /* col indices of the i-th row of B_seq */ 3128 bnzi = bi[i+1] - bi[i]; 3129 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3130 3131 /* add local non-zero vals of this proc's seqmat into ba */ 3132 anzi = ai[arow+1] - ai[arow]; 3133 aj = a->j + ai[arow]; 3134 aa = a->a + ai[arow]; 3135 nextaj = 0; 3136 for (j=0; nextaj<anzi; j++){ 3137 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3138 ba_i[j] += aa[nextaj++]; 3139 } 3140 } 3141 3142 /* add received vals into ba */ 3143 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3144 /* i-th row */ 3145 if (i == 0){ 3146 anzi = *(ijbuf_r[k]+i); 3147 aj = ijbuf_r[k] + m; 3148 aa = abuf_r[k]; 3149 } else { 3150 anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1); 3151 aj = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1); 3152 aa = abuf_r[k] + *(ijbuf_r[k]+i-1); 3153 } 3154 nextaj = 0; 3155 for (j=0; nextaj<anzi; j++){ 3156 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3157 ba_i[j] += aa[nextaj++]; 3158 } 3159 } 3160 } 3161 3162 ierr = MatSetValues(B_mpi,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3163 } 3164 ierr = MatAssemblyBegin(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3165 ierr = MatAssemblyEnd(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3166 3167 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3168 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3169 3170 /* attach the supporting struct to B_mpi for reuse */ 3171 /*-------------------------------------------------*/ 3172 if (scall == MAT_INITIAL_MATRIX){ 3173 B_mpi->spptr = (void*)merge; 3174 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3175 merge->bi = bi; 3176 merge->bj = bj; 3177 merge->ijbuf_r = ijbuf_r; 3178 merge->C_seq = seqmat; 3179 3180 *mpimat = B_mpi; 3181 } 3182 PetscFunctionReturn(0); 3183 } 3184 3185 #undef __FUNCT__ 3186 #define __FUNCT__ "MatGetLocalMat" 3187 /*@C 3188 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3189 3190 Collective on Mat 3191 3192 Input Parameters: 3193 + A - the matrix 3194 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3195 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3196 3197 Output Parameter: 3198 . A_loc - the local sequential matrix generated 3199 3200 Level: developer 3201 3202 @*/ 3203 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3204 { 3205 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3206 PetscErrorCode ierr; 3207 int *idx,i,start,end,ncols,nzA,nzB,*cmap,imark; 3208 IS isrowa,iscola; 3209 Mat *aloc; 3210 3211 PetscFunctionBegin; 3212 if (!row){ 3213 start = a->rstart; end = a->rend; 3214 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3215 } else { 3216 isrowa = *row; 3217 } 3218 if (!col){ 3219 start = a->cstart; 3220 cmap = a->garray; 3221 nzA = a->A->n; 3222 nzB = a->B->n; 3223 ierr = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr); 3224 ncols = 0; 3225 for (i=0; i<nzB; i++) { 3226 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3227 else break; 3228 } 3229 imark = i; 3230 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3231 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3232 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3233 ierr = PetscFree(idx);CHKERRQ(ierr); 3234 } else { 3235 iscola = *col; 3236 } 3237 if (scall != MAT_INITIAL_MATRIX){ 3238 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3239 aloc[0] = *A_loc; 3240 } 3241 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3242 *A_loc = aloc[0]; 3243 ierr = PetscFree(aloc);CHKERRQ(ierr); 3244 if (!row){ 3245 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3246 } 3247 if (!col){ 3248 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3249 } 3250 PetscFunctionReturn(0); 3251 } 3252 3253 #undef __FUNCT__ 3254 #define __FUNCT__ "MatGetBrowsOfAcols" 3255 /*@C 3256 MatGetLocalMat - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero col of A 3257 3258 Collective on Mat 3259 3260 Input Parameters: 3261 + A,B - the matrices 3262 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3263 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3264 3265 Output Parameter: 3266 + rowb, colb - index sets of rows and columns of B to extract 3267 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3268 - B_seq - the sequential matrix generated 3269 3270 Level: developer 3271 3272 @*/ 3273 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3274 { 3275 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3276 PetscErrorCode ierr; 3277 int *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3278 IS isrowb,iscolb; 3279 Mat *bseq; 3280 3281 PetscFunctionBegin; 3282 if (a->cstart != b->rstart || a->cend != b->rend){ 3283 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3284 } 3285 3286 if (scall == MAT_INITIAL_MATRIX){ 3287 start = a->cstart; 3288 cmap = a->garray; 3289 nzA = a->A->n; 3290 nzB = a->B->n; 3291 ierr = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr); 3292 ncols = 0; 3293 for (i=0; i<nzB; i++) { 3294 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3295 else break; 3296 } 3297 imark = i; 3298 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3299 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3300 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3301 ierr = PetscFree(idx);CHKERRQ(ierr); 3302 *brstart = imark; 3303 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3304 } else { 3305 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3306 isrowb = *rowb; iscolb = *colb; 3307 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3308 bseq[0] = *B_seq; 3309 } 3310 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3311 *B_seq = bseq[0]; 3312 ierr = PetscFree(bseq);CHKERRQ(ierr); 3313 if (!rowb){ 3314 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3315 } else { 3316 *rowb = isrowb; 3317 } 3318 if (!colb){ 3319 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3320 } else { 3321 *colb = iscolb; 3322 } 3323 3324 PetscFunctionReturn(0); 3325 } 3326