1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 PetscErrorCode ierr; 18 int n = aij->B->n,i; 19 20 PetscFunctionBegin; 21 #if defined (PETSC_USE_CTABLE) 22 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 23 for (i=0; i<n; i++){ 24 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 25 } 26 #else 27 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 28 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 29 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 30 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 31 #endif 32 PetscFunctionReturn(0); 33 } 34 35 #define CHUNKSIZE 15 36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 37 { \ 38 \ 39 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 40 rmax = aimax[row]; nrow = ailen[row]; \ 41 col1 = col - shift; \ 42 \ 43 low = 0; high = nrow; \ 44 while (high-low > 5) { \ 45 t = (low+high)/2; \ 46 if (rp[t] > col) high = t; \ 47 else low = t; \ 48 } \ 49 for (_i=low; _i<high; _i++) { \ 50 if (rp[_i] > col1) break; \ 51 if (rp[_i] == col1) { \ 52 if (addv == ADD_VALUES) ap[_i] += value; \ 53 else ap[_i] = value; \ 54 goto a_noinsert; \ 55 } \ 56 } \ 57 if (nonew == 1) goto a_noinsert; \ 58 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 59 if (nrow >= rmax) { \ 60 /* there is no extra room in row, therefore enlarge */ \ 61 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 62 PetscScalar *new_a; \ 63 \ 64 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 65 \ 66 /* malloc new storage space */ \ 67 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 68 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 69 new_j = (int*)(new_a + new_nz); \ 70 new_i = new_j + new_nz; \ 71 \ 72 /* copy over old data into new slots */ \ 73 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 74 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 75 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 76 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 77 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 78 len*sizeof(int));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 80 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 81 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 82 /* free up old matrix storage */ \ 83 \ 84 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 85 if (!a->singlemalloc) { \ 86 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 87 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 88 } \ 89 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 90 a->singlemalloc = PETSC_TRUE; \ 91 \ 92 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 93 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 94 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 95 a->maxnz += CHUNKSIZE; \ 96 a->reallocs++; \ 97 } \ 98 N = nrow++ - 1; a->nz++; \ 99 /* shift up all the later entries in this row */ \ 100 for (ii=N; ii>=_i; ii--) { \ 101 rp[ii+1] = rp[ii]; \ 102 ap[ii+1] = ap[ii]; \ 103 } \ 104 rp[_i] = col1; \ 105 ap[_i] = value; \ 106 a_noinsert: ; \ 107 ailen[row] = nrow; \ 108 } 109 110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 111 { \ 112 \ 113 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 114 rmax = bimax[row]; nrow = bilen[row]; \ 115 col1 = col - shift; \ 116 \ 117 low = 0; high = nrow; \ 118 while (high-low > 5) { \ 119 t = (low+high)/2; \ 120 if (rp[t] > col) high = t; \ 121 else low = t; \ 122 } \ 123 for (_i=low; _i<high; _i++) { \ 124 if (rp[_i] > col1) break; \ 125 if (rp[_i] == col1) { \ 126 if (addv == ADD_VALUES) ap[_i] += value; \ 127 else ap[_i] = value; \ 128 goto b_noinsert; \ 129 } \ 130 } \ 131 if (nonew == 1) goto b_noinsert; \ 132 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 133 if (nrow >= rmax) { \ 134 /* there is no extra room in row, therefore enlarge */ \ 135 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 136 PetscScalar *new_a; \ 137 \ 138 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 139 \ 140 /* malloc new storage space */ \ 141 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 142 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 143 new_j = (int*)(new_a + new_nz); \ 144 new_i = new_j + new_nz; \ 145 \ 146 /* copy over old data into new slots */ \ 147 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 148 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 149 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 150 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 151 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 152 len*sizeof(int));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 154 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 155 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 156 /* free up old matrix storage */ \ 157 \ 158 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 159 if (!b->singlemalloc) { \ 160 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 161 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 162 } \ 163 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 164 b->singlemalloc = PETSC_TRUE; \ 165 \ 166 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 167 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 168 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 169 b->maxnz += CHUNKSIZE; \ 170 b->reallocs++; \ 171 } \ 172 N = nrow++ - 1; b->nz++; \ 173 /* shift up all the later entries in this row */ \ 174 for (ii=N; ii>=_i; ii--) { \ 175 rp[ii+1] = rp[ii]; \ 176 ap[ii+1] = ap[ii]; \ 177 } \ 178 rp[_i] = col1; \ 179 ap[_i] = value; \ 180 b_noinsert: ; \ 181 bilen[row] = nrow; \ 182 } 183 184 #undef __FUNCT__ 185 #define __FUNCT__ "MatSetValues_MPIAIJ" 186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 189 PetscScalar value; 190 PetscErrorCode ierr; 191 int i,j,rstart = aij->rstart,rend = aij->rend; 192 int cstart = aij->cstart,cend = aij->cend,row,col; 193 PetscTruth roworiented = aij->roworiented; 194 195 /* Some Variables required in the macro */ 196 Mat A = aij->A; 197 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 198 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 199 PetscScalar *aa = a->a; 200 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 201 Mat B = aij->B; 202 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 203 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 204 PetscScalar *ba = b->a; 205 206 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 207 int nonew = a->nonew,shift=0; 208 PetscScalar *ap; 209 210 PetscFunctionBegin; 211 for (i=0; i<m; i++) { 212 if (im[i] < 0) continue; 213 #if defined(PETSC_USE_BOPT_g) 214 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 215 #endif 216 if (im[i] >= rstart && im[i] < rend) { 217 row = im[i] - rstart; 218 for (j=0; j<n; j++) { 219 if (in[j] >= cstart && in[j] < cend){ 220 col = in[j] - cstart; 221 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 222 if (ignorezeroentries && value == 0.0) continue; 223 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 224 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 225 } else if (in[j] < 0) continue; 226 #if defined(PETSC_USE_BOPT_g) 227 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 228 #endif 229 else { 230 if (mat->was_assembled) { 231 if (!aij->colmap) { 232 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 233 } 234 #if defined (PETSC_USE_CTABLE) 235 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 236 col--; 237 #else 238 col = aij->colmap[in[j]] - 1; 239 #endif 240 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 241 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 242 col = in[j]; 243 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 244 B = aij->B; 245 b = (Mat_SeqAIJ*)B->data; 246 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 247 ba = b->a; 248 } 249 } else col = in[j]; 250 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 251 if (ignorezeroentries && value == 0.0) continue; 252 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 253 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 254 } 255 } 256 } else { 257 if (!aij->donotstash) { 258 if (roworiented) { 259 if (ignorezeroentries && v[i*n] == 0.0) continue; 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 if (ignorezeroentries && v[i] == 0.0) continue; 263 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 264 } 265 } 266 } 267 } 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatGetValues_MPIAIJ" 273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 274 { 275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 276 PetscErrorCode ierr; 277 int i,j,rstart = aij->rstart,rend = aij->rend; 278 int cstart = aij->cstart,cend = aij->cend,row,col; 279 280 PetscFunctionBegin; 281 for (i=0; i<m; i++) { 282 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 283 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 284 if (idxm[i] >= rstart && idxm[i] < rend) { 285 row = idxm[i] - rstart; 286 for (j=0; j<n; j++) { 287 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 288 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 289 if (idxn[j] >= cstart && idxn[j] < cend){ 290 col = idxn[j] - cstart; 291 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 292 } else { 293 if (!aij->colmap) { 294 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 295 } 296 #if defined (PETSC_USE_CTABLE) 297 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 298 col --; 299 #else 300 col = aij->colmap[idxn[j]] - 1; 301 #endif 302 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 303 else { 304 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 305 } 306 } 307 } 308 } else { 309 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 310 } 311 } 312 PetscFunctionReturn(0); 313 } 314 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 318 { 319 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 320 PetscErrorCode ierr; 321 int nstash,reallocs; 322 InsertMode addv; 323 324 PetscFunctionBegin; 325 if (aij->donotstash) { 326 PetscFunctionReturn(0); 327 } 328 329 /* make sure all processors are either in INSERTMODE or ADDMODE */ 330 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 331 if (addv == (ADD_VALUES|INSERT_VALUES)) { 332 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 333 } 334 mat->insertmode = addv; /* in case this processor had no cache */ 335 336 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 337 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 338 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 339 PetscFunctionReturn(0); 340 } 341 342 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 346 { 347 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 348 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 349 PetscErrorCode ierr; 350 int i,j,rstart,ncols,n,flg; 351 int *row,*col,other_disassembled; 352 PetscScalar *val; 353 InsertMode addv = mat->insertmode; 354 355 PetscFunctionBegin; 356 if (!aij->donotstash) { 357 while (1) { 358 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 359 if (!flg) break; 360 361 for (i=0; i<n;) { 362 /* Now identify the consecutive vals belonging to the same row */ 363 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 364 if (j < n) ncols = j-i; 365 else ncols = n-i; 366 /* Now assemble all these values with a single function call */ 367 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 368 i = j; 369 } 370 } 371 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 372 } 373 374 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 375 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 376 377 /* determine if any processor has disassembled, if so we must 378 also disassemble ourselfs, in order that we may reassemble. */ 379 /* 380 if nonzero structure of submatrix B cannot change then we know that 381 no processor disassembled thus we can skip this stuff 382 */ 383 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 384 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 385 if (mat->was_assembled && !other_disassembled) { 386 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 387 } 388 } 389 390 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 391 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 392 } 393 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 395 396 if (aij->rowvalues) { 397 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 398 aij->rowvalues = 0; 399 } 400 401 /* used by MatAXPY() */ 402 a->xtoy = 0; b->xtoy = 0; 403 a->XtoY = 0; b->XtoY = 0; 404 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 410 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 411 { 412 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 413 PetscErrorCode ierr; 414 415 PetscFunctionBegin; 416 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 417 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNCT__ 422 #define __FUNCT__ "MatZeroRows_MPIAIJ" 423 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 424 { 425 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 426 PetscErrorCode ierr; 427 int i,N,*rows,*owners = l->rowners,size = l->size; 428 int *nprocs,j,idx,nsends,row; 429 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 430 int *rvalues,tag = A->tag,count,base,slen,n,*source; 431 int *lens,imdex,*lrows,*values,rstart=l->rstart; 432 MPI_Comm comm = A->comm; 433 MPI_Request *send_waits,*recv_waits; 434 MPI_Status recv_status,*send_status; 435 IS istmp; 436 PetscTruth found; 437 438 PetscFunctionBegin; 439 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 440 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 441 442 /* first count number of contributors to each processor */ 443 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 444 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 445 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 446 for (i=0; i<N; i++) { 447 idx = rows[i]; 448 found = PETSC_FALSE; 449 for (j=0; j<size; j++) { 450 if (idx >= owners[j] && idx < owners[j+1]) { 451 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 452 } 453 } 454 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 455 } 456 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 457 458 /* inform other processors of number of messages and max length*/ 459 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 460 461 /* post receives: */ 462 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 463 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 464 for (i=0; i<nrecvs; i++) { 465 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 466 } 467 468 /* do sends: 469 1) starts[i] gives the starting index in svalues for stuff going to 470 the ith processor 471 */ 472 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 473 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 474 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 475 starts[0] = 0; 476 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 477 for (i=0; i<N; i++) { 478 svalues[starts[owner[i]]++] = rows[i]; 479 } 480 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 481 482 starts[0] = 0; 483 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 484 count = 0; 485 for (i=0; i<size; i++) { 486 if (nprocs[2*i+1]) { 487 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 488 } 489 } 490 ierr = PetscFree(starts);CHKERRQ(ierr); 491 492 base = owners[rank]; 493 494 /* wait on receives */ 495 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 496 source = lens + nrecvs; 497 count = nrecvs; slen = 0; 498 while (count) { 499 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 500 /* unpack receives into our local space */ 501 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 502 source[imdex] = recv_status.MPI_SOURCE; 503 lens[imdex] = n; 504 slen += n; 505 count--; 506 } 507 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 508 509 /* move the data into the send scatter */ 510 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 511 count = 0; 512 for (i=0; i<nrecvs; i++) { 513 values = rvalues + i*nmax; 514 for (j=0; j<lens[i]; j++) { 515 lrows[count++] = values[j] - base; 516 } 517 } 518 ierr = PetscFree(rvalues);CHKERRQ(ierr); 519 ierr = PetscFree(lens);CHKERRQ(ierr); 520 ierr = PetscFree(owner);CHKERRQ(ierr); 521 ierr = PetscFree(nprocs);CHKERRQ(ierr); 522 523 /* actually zap the local rows */ 524 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 525 PetscLogObjectParent(A,istmp); 526 527 /* 528 Zero the required rows. If the "diagonal block" of the matrix 529 is square and the user wishes to set the diagonal we use seperate 530 code so that MatSetValues() is not called for each diagonal allocating 531 new memory, thus calling lots of mallocs and slowing things down. 532 533 Contributed by: Mathew Knepley 534 */ 535 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 536 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 537 if (diag && (l->A->M == l->A->N)) { 538 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 539 } else if (diag) { 540 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 541 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 542 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 544 } 545 for (i = 0; i < slen; i++) { 546 row = lrows[i] + rstart; 547 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 548 } 549 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 550 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 } else { 552 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 553 } 554 ierr = ISDestroy(istmp);CHKERRQ(ierr); 555 ierr = PetscFree(lrows);CHKERRQ(ierr); 556 557 /* wait on sends */ 558 if (nsends) { 559 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 560 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 561 ierr = PetscFree(send_status);CHKERRQ(ierr); 562 } 563 ierr = PetscFree(send_waits);CHKERRQ(ierr); 564 ierr = PetscFree(svalues);CHKERRQ(ierr); 565 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNCT__ 570 #define __FUNCT__ "MatMult_MPIAIJ" 571 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 572 { 573 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 574 PetscErrorCode ierr; 575 int nt; 576 577 PetscFunctionBegin; 578 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 579 if (nt != A->n) { 580 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 581 } 582 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 583 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 584 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 585 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatMultAdd_MPIAIJ" 591 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 592 { 593 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 594 PetscErrorCode ierr; 595 596 PetscFunctionBegin; 597 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 598 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 599 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 601 PetscFunctionReturn(0); 602 } 603 604 #undef __FUNCT__ 605 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 606 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 607 { 608 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 609 PetscErrorCode ierr; 610 611 PetscFunctionBegin; 612 /* do nondiagonal part */ 613 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 614 /* send it on its way */ 615 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 616 /* do local part */ 617 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 618 /* receive remote parts: note this assumes the values are not actually */ 619 /* inserted in yy until the next line, which is true for my implementation*/ 620 /* but is not perhaps always true. */ 621 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 622 PetscFunctionReturn(0); 623 } 624 625 EXTERN_C_BEGIN 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 628 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 629 { 630 MPI_Comm comm; 631 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 632 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 633 IS Me,Notme; 634 PetscErrorCode ierr; 635 int M,N,first,last,*notme,ntids,i; 636 637 PetscFunctionBegin; 638 639 /* Easy test: symmetric diagonal block */ 640 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 641 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 642 if (!*f) PetscFunctionReturn(0); 643 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 644 ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr); 645 if (ntids==1) PetscFunctionReturn(0); 646 647 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 648 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 649 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 650 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me);CHKERRQ(ierr); 651 for (i=0; i<first; i++) notme[i] = i; 652 for (i=last; i<M; i++) notme[i-last+first] = i; 653 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 654 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 655 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 656 Aoff = Aoffs[0]; 657 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 658 Boff = Boffs[0]; 659 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 660 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 661 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 662 ierr = ISDestroy(Me);CHKERRQ(ierr); 663 ierr = ISDestroy(Notme);CHKERRQ(ierr); 664 665 PetscFunctionReturn(0); 666 } 667 EXTERN_C_END 668 669 #undef __FUNCT__ 670 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 671 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 672 { 673 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 674 PetscErrorCode ierr; 675 676 PetscFunctionBegin; 677 /* do nondiagonal part */ 678 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 679 /* send it on its way */ 680 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 681 /* do local part */ 682 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 683 /* receive remote parts: note this assumes the values are not actually */ 684 /* inserted in yy until the next line, which is true for my implementation*/ 685 /* but is not perhaps always true. */ 686 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 687 PetscFunctionReturn(0); 688 } 689 690 /* 691 This only works correctly for square matrices where the subblock A->A is the 692 diagonal block 693 */ 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 696 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 697 { 698 PetscErrorCode ierr; 699 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 700 701 PetscFunctionBegin; 702 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 703 if (a->rstart != a->cstart || a->rend != a->cend) { 704 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 705 } 706 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNCT__ 711 #define __FUNCT__ "MatScale_MPIAIJ" 712 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 713 { 714 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 715 PetscErrorCode ierr; 716 717 PetscFunctionBegin; 718 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 719 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 720 PetscFunctionReturn(0); 721 } 722 723 #undef __FUNCT__ 724 #define __FUNCT__ "MatDestroy_MPIAIJ" 725 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 726 { 727 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 728 PetscErrorCode ierr; 729 730 PetscFunctionBegin; 731 #if defined(PETSC_USE_LOG) 732 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 733 #endif 734 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 735 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 736 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 737 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 738 #if defined (PETSC_USE_CTABLE) 739 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 740 #else 741 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 742 #endif 743 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 744 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 745 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 746 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 747 ierr = PetscFree(aij);CHKERRQ(ierr); 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNCT__ 752 #define __FUNCT__ "MatView_MPIAIJ_Binary" 753 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 754 { 755 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 756 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 757 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 758 PetscErrorCode ierr; 759 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 760 int fd; 761 PetscInt nz,header[4],*row_lengths,*range,rlen,i; 762 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 763 PetscScalar *column_values; 764 765 PetscFunctionBegin; 766 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 767 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 768 nz = A->nz + B->nz; 769 if (!rank) { 770 header[0] = MAT_FILE_COOKIE; 771 header[1] = mat->M; 772 header[2] = mat->N; 773 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 774 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 775 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 776 /* get largest number of rows any processor has */ 777 rlen = mat->m; 778 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 779 for (i=1; i<size; i++) { 780 rlen = PetscMax(rlen,range[i+1] - range[i]); 781 } 782 } else { 783 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 784 rlen = mat->m; 785 } 786 787 /* load up the local row counts */ 788 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 789 for (i=0; i<mat->m; i++) { 790 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 791 } 792 793 /* store the row lengths to the file */ 794 if (!rank) { 795 MPI_Status status; 796 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 797 for (i=1; i<size; i++) { 798 rlen = range[i+1] - range[i]; 799 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 800 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 801 } 802 } else { 803 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 804 } 805 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 806 807 /* load up the local column indices */ 808 nzmax = nz; /* )th processor needs space a largest processor needs */ 809 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 810 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 811 cnt = 0; 812 for (i=0; i<mat->m; i++) { 813 for (j=B->i[i]; j<B->i[i+1]; j++) { 814 if ( (col = garray[B->j[j]]) > cstart) break; 815 column_indices[cnt++] = col; 816 } 817 for (k=A->i[i]; k<A->i[i+1]; k++) { 818 column_indices[cnt++] = A->j[k] + cstart; 819 } 820 for (; j<B->i[i+1]; j++) { 821 column_indices[cnt++] = garray[B->j[j]]; 822 } 823 } 824 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 825 826 /* store the column indices to the file */ 827 if (!rank) { 828 MPI_Status status; 829 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 830 for (i=1; i<size; i++) { 831 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 832 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 833 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 834 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 835 } 836 } else { 837 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 838 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 839 } 840 ierr = PetscFree(column_indices);CHKERRQ(ierr); 841 842 /* load up the local column values */ 843 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 844 cnt = 0; 845 for (i=0; i<mat->m; i++) { 846 for (j=B->i[i]; j<B->i[i+1]; j++) { 847 if ( garray[B->j[j]] > cstart) break; 848 column_values[cnt++] = B->a[j]; 849 } 850 for (k=A->i[i]; k<A->i[i+1]; k++) { 851 column_values[cnt++] = A->a[k]; 852 } 853 for (; j<B->i[i+1]; j++) { 854 column_values[cnt++] = B->a[j]; 855 } 856 } 857 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 858 859 /* store the column values to the file */ 860 if (!rank) { 861 MPI_Status status; 862 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 863 for (i=1; i<size; i++) { 864 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 865 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 866 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 867 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 868 } 869 } else { 870 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 871 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 872 } 873 ierr = PetscFree(column_values);CHKERRQ(ierr); 874 PetscFunctionReturn(0); 875 } 876 877 #undef __FUNCT__ 878 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 879 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 880 { 881 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 882 PetscErrorCode ierr; 883 PetscMPIInt rank = aij->rank,size = aij->size; 884 PetscTruth isdraw,iascii,flg,isbinary; 885 PetscViewer sviewer; 886 PetscViewerFormat format; 887 888 PetscFunctionBegin; 889 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 890 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 891 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 892 if (iascii) { 893 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 894 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 895 MatInfo info; 896 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 897 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 898 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 899 if (flg) { 900 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 901 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 902 } else { 903 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 904 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 905 } 906 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 907 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 908 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 909 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 910 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 911 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 912 PetscFunctionReturn(0); 913 } else if (format == PETSC_VIEWER_ASCII_INFO) { 914 PetscFunctionReturn(0); 915 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 916 PetscFunctionReturn(0); 917 } 918 } else if (isbinary) { 919 if (size == 1) { 920 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 921 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 922 } else { 923 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 924 } 925 PetscFunctionReturn(0); 926 } else if (isdraw) { 927 PetscDraw draw; 928 PetscTruth isnull; 929 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 930 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 931 } 932 933 if (size == 1) { 934 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 935 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 936 } else { 937 /* assemble the entire matrix onto first processor. */ 938 Mat A; 939 Mat_SeqAIJ *Aloc; 940 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 941 PetscScalar *a; 942 943 if (!rank) { 944 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 945 } else { 946 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 947 } 948 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 949 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 950 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 951 PetscLogObjectParent(mat,A); 952 953 /* copy over the A part */ 954 Aloc = (Mat_SeqAIJ*)aij->A->data; 955 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 956 row = aij->rstart; 957 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 958 for (i=0; i<m; i++) { 959 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 960 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 961 } 962 aj = Aloc->j; 963 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 964 965 /* copy over the B part */ 966 Aloc = (Mat_SeqAIJ*)aij->B->data; 967 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 968 row = aij->rstart; 969 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 970 ct = cols; 971 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 972 for (i=0; i<m; i++) { 973 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 974 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 975 } 976 ierr = PetscFree(ct);CHKERRQ(ierr); 977 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 978 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 979 /* 980 Everyone has to call to draw the matrix since the graphics waits are 981 synchronized across all processors that share the PetscDraw object 982 */ 983 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 984 if (!rank) { 985 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 986 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 987 } 988 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 989 ierr = MatDestroy(A);CHKERRQ(ierr); 990 } 991 PetscFunctionReturn(0); 992 } 993 994 #undef __FUNCT__ 995 #define __FUNCT__ "MatView_MPIAIJ" 996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 997 { 998 PetscErrorCode ierr; 999 PetscTruth iascii,isdraw,issocket,isbinary; 1000 1001 PetscFunctionBegin; 1002 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1003 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 1004 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1005 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 1006 if (iascii || isdraw || isbinary || issocket) { 1007 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1008 } else { 1009 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1010 } 1011 PetscFunctionReturn(0); 1012 } 1013 1014 1015 1016 #undef __FUNCT__ 1017 #define __FUNCT__ "MatRelax_MPIAIJ" 1018 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1019 { 1020 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1021 PetscErrorCode ierr; 1022 Vec bb1; 1023 PetscScalar mone=-1.0; 1024 1025 PetscFunctionBegin; 1026 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1027 1028 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1029 1030 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1031 if (flag & SOR_ZERO_INITIAL_GUESS) { 1032 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1033 its--; 1034 } 1035 1036 while (its--) { 1037 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1038 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1039 1040 /* update rhs: bb1 = bb - B*x */ 1041 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1042 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1043 1044 /* local sweep */ 1045 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1046 CHKERRQ(ierr); 1047 } 1048 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1049 if (flag & SOR_ZERO_INITIAL_GUESS) { 1050 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1051 its--; 1052 } 1053 while (its--) { 1054 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1055 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1056 1057 /* update rhs: bb1 = bb - B*x */ 1058 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1059 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1060 1061 /* local sweep */ 1062 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1063 CHKERRQ(ierr); 1064 } 1065 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1066 if (flag & SOR_ZERO_INITIAL_GUESS) { 1067 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1068 its--; 1069 } 1070 while (its--) { 1071 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1072 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1073 1074 /* update rhs: bb1 = bb - B*x */ 1075 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1076 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1077 1078 /* local sweep */ 1079 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1080 CHKERRQ(ierr); 1081 } 1082 } else { 1083 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1084 } 1085 1086 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1087 PetscFunctionReturn(0); 1088 } 1089 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1092 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1093 { 1094 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1095 Mat A = mat->A,B = mat->B; 1096 PetscErrorCode ierr; 1097 PetscReal isend[5],irecv[5]; 1098 1099 PetscFunctionBegin; 1100 info->block_size = 1.0; 1101 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1102 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1103 isend[3] = info->memory; isend[4] = info->mallocs; 1104 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1105 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1106 isend[3] += info->memory; isend[4] += info->mallocs; 1107 if (flag == MAT_LOCAL) { 1108 info->nz_used = isend[0]; 1109 info->nz_allocated = isend[1]; 1110 info->nz_unneeded = isend[2]; 1111 info->memory = isend[3]; 1112 info->mallocs = isend[4]; 1113 } else if (flag == MAT_GLOBAL_MAX) { 1114 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1115 info->nz_used = irecv[0]; 1116 info->nz_allocated = irecv[1]; 1117 info->nz_unneeded = irecv[2]; 1118 info->memory = irecv[3]; 1119 info->mallocs = irecv[4]; 1120 } else if (flag == MAT_GLOBAL_SUM) { 1121 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1122 info->nz_used = irecv[0]; 1123 info->nz_allocated = irecv[1]; 1124 info->nz_unneeded = irecv[2]; 1125 info->memory = irecv[3]; 1126 info->mallocs = irecv[4]; 1127 } 1128 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1129 info->fill_ratio_needed = 0; 1130 info->factor_mallocs = 0; 1131 info->rows_global = (double)matin->M; 1132 info->columns_global = (double)matin->N; 1133 info->rows_local = (double)matin->m; 1134 info->columns_local = (double)matin->N; 1135 1136 PetscFunctionReturn(0); 1137 } 1138 1139 #undef __FUNCT__ 1140 #define __FUNCT__ "MatSetOption_MPIAIJ" 1141 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1142 { 1143 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1144 PetscErrorCode ierr; 1145 1146 PetscFunctionBegin; 1147 switch (op) { 1148 case MAT_NO_NEW_NONZERO_LOCATIONS: 1149 case MAT_YES_NEW_NONZERO_LOCATIONS: 1150 case MAT_COLUMNS_UNSORTED: 1151 case MAT_COLUMNS_SORTED: 1152 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1153 case MAT_KEEP_ZEROED_ROWS: 1154 case MAT_NEW_NONZERO_LOCATION_ERR: 1155 case MAT_USE_INODES: 1156 case MAT_DO_NOT_USE_INODES: 1157 case MAT_IGNORE_ZERO_ENTRIES: 1158 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1159 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1160 break; 1161 case MAT_ROW_ORIENTED: 1162 a->roworiented = PETSC_TRUE; 1163 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1164 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1165 break; 1166 case MAT_ROWS_SORTED: 1167 case MAT_ROWS_UNSORTED: 1168 case MAT_YES_NEW_DIAGONALS: 1169 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1170 break; 1171 case MAT_COLUMN_ORIENTED: 1172 a->roworiented = PETSC_FALSE; 1173 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1174 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1175 break; 1176 case MAT_IGNORE_OFF_PROC_ENTRIES: 1177 a->donotstash = PETSC_TRUE; 1178 break; 1179 case MAT_NO_NEW_DIAGONALS: 1180 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1181 case MAT_SYMMETRIC: 1182 case MAT_STRUCTURALLY_SYMMETRIC: 1183 case MAT_HERMITIAN: 1184 case MAT_SYMMETRY_ETERNAL: 1185 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1186 break; 1187 case MAT_NOT_SYMMETRIC: 1188 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1189 case MAT_NOT_HERMITIAN: 1190 case MAT_NOT_SYMMETRY_ETERNAL: 1191 break; 1192 default: 1193 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1194 } 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatGetRow_MPIAIJ" 1200 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1201 { 1202 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1203 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1204 PetscErrorCode ierr; 1205 int i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1206 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1207 int *cmap,*idx_p; 1208 1209 PetscFunctionBegin; 1210 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1211 mat->getrowactive = PETSC_TRUE; 1212 1213 if (!mat->rowvalues && (idx || v)) { 1214 /* 1215 allocate enough space to hold information from the longest row. 1216 */ 1217 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1218 int max = 1,tmp; 1219 for (i=0; i<matin->m; i++) { 1220 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1221 if (max < tmp) { max = tmp; } 1222 } 1223 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1224 mat->rowindices = (int*)(mat->rowvalues + max); 1225 } 1226 1227 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1228 lrow = row - rstart; 1229 1230 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1231 if (!v) {pvA = 0; pvB = 0;} 1232 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1233 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1234 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1235 nztot = nzA + nzB; 1236 1237 cmap = mat->garray; 1238 if (v || idx) { 1239 if (nztot) { 1240 /* Sort by increasing column numbers, assuming A and B already sorted */ 1241 int imark = -1; 1242 if (v) { 1243 *v = v_p = mat->rowvalues; 1244 for (i=0; i<nzB; i++) { 1245 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1246 else break; 1247 } 1248 imark = i; 1249 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1250 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1251 } 1252 if (idx) { 1253 *idx = idx_p = mat->rowindices; 1254 if (imark > -1) { 1255 for (i=0; i<imark; i++) { 1256 idx_p[i] = cmap[cworkB[i]]; 1257 } 1258 } else { 1259 for (i=0; i<nzB; i++) { 1260 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1261 else break; 1262 } 1263 imark = i; 1264 } 1265 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1266 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1267 } 1268 } else { 1269 if (idx) *idx = 0; 1270 if (v) *v = 0; 1271 } 1272 } 1273 *nz = nztot; 1274 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1275 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1281 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1282 { 1283 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1284 1285 PetscFunctionBegin; 1286 if (aij->getrowactive == PETSC_FALSE) { 1287 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1288 } 1289 aij->getrowactive = PETSC_FALSE; 1290 PetscFunctionReturn(0); 1291 } 1292 1293 #undef __FUNCT__ 1294 #define __FUNCT__ "MatNorm_MPIAIJ" 1295 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1296 { 1297 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1298 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1299 PetscErrorCode ierr; 1300 int i,j,cstart = aij->cstart; 1301 PetscReal sum = 0.0; 1302 PetscScalar *v; 1303 1304 PetscFunctionBegin; 1305 if (aij->size == 1) { 1306 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1307 } else { 1308 if (type == NORM_FROBENIUS) { 1309 v = amat->a; 1310 for (i=0; i<amat->nz; i++) { 1311 #if defined(PETSC_USE_COMPLEX) 1312 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1313 #else 1314 sum += (*v)*(*v); v++; 1315 #endif 1316 } 1317 v = bmat->a; 1318 for (i=0; i<bmat->nz; i++) { 1319 #if defined(PETSC_USE_COMPLEX) 1320 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1321 #else 1322 sum += (*v)*(*v); v++; 1323 #endif 1324 } 1325 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1326 *norm = sqrt(*norm); 1327 } else if (type == NORM_1) { /* max column norm */ 1328 PetscReal *tmp,*tmp2; 1329 int *jj,*garray = aij->garray; 1330 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1331 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1332 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1333 *norm = 0.0; 1334 v = amat->a; jj = amat->j; 1335 for (j=0; j<amat->nz; j++) { 1336 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1337 } 1338 v = bmat->a; jj = bmat->j; 1339 for (j=0; j<bmat->nz; j++) { 1340 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1341 } 1342 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1343 for (j=0; j<mat->N; j++) { 1344 if (tmp2[j] > *norm) *norm = tmp2[j]; 1345 } 1346 ierr = PetscFree(tmp);CHKERRQ(ierr); 1347 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1348 } else if (type == NORM_INFINITY) { /* max row norm */ 1349 PetscReal ntemp = 0.0; 1350 for (j=0; j<aij->A->m; j++) { 1351 v = amat->a + amat->i[j]; 1352 sum = 0.0; 1353 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1354 sum += PetscAbsScalar(*v); v++; 1355 } 1356 v = bmat->a + bmat->i[j]; 1357 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1358 sum += PetscAbsScalar(*v); v++; 1359 } 1360 if (sum > ntemp) ntemp = sum; 1361 } 1362 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1363 } else { 1364 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1365 } 1366 } 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatTranspose_MPIAIJ" 1372 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1373 { 1374 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1375 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1376 PetscErrorCode ierr; 1377 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1378 Mat B; 1379 PetscScalar *array; 1380 1381 PetscFunctionBegin; 1382 if (!matout && M != N) { 1383 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1384 } 1385 1386 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1387 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1388 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1389 1390 /* copy over the A part */ 1391 Aloc = (Mat_SeqAIJ*)a->A->data; 1392 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1393 row = a->rstart; 1394 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1395 for (i=0; i<m; i++) { 1396 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1397 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1398 } 1399 aj = Aloc->j; 1400 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1401 1402 /* copy over the B part */ 1403 Aloc = (Mat_SeqAIJ*)a->B->data; 1404 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1405 row = a->rstart; 1406 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1407 ct = cols; 1408 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1409 for (i=0; i<m; i++) { 1410 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1411 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1412 } 1413 ierr = PetscFree(ct);CHKERRQ(ierr); 1414 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1415 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1416 if (matout) { 1417 *matout = B; 1418 } else { 1419 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1420 } 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1426 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1427 { 1428 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1429 Mat a = aij->A,b = aij->B; 1430 PetscErrorCode ierr; 1431 int s1,s2,s3; 1432 1433 PetscFunctionBegin; 1434 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1435 if (rr) { 1436 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1437 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1438 /* Overlap communication with computation. */ 1439 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1440 } 1441 if (ll) { 1442 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1443 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1444 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1445 } 1446 /* scale the diagonal block */ 1447 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1448 1449 if (rr) { 1450 /* Do a scatter end and then right scale the off-diagonal block */ 1451 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1452 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1453 } 1454 1455 PetscFunctionReturn(0); 1456 } 1457 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1461 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1462 { 1463 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1464 PetscErrorCode ierr; 1465 1466 PetscFunctionBegin; 1467 if (!a->rank) { 1468 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1469 } 1470 PetscFunctionReturn(0); 1471 } 1472 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1475 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1476 { 1477 PetscFunctionBegin; 1478 *bs = 1; 1479 PetscFunctionReturn(0); 1480 } 1481 #undef __FUNCT__ 1482 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1483 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1484 { 1485 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1486 PetscErrorCode ierr; 1487 1488 PetscFunctionBegin; 1489 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1490 PetscFunctionReturn(0); 1491 } 1492 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatEqual_MPIAIJ" 1495 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1496 { 1497 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1498 Mat a,b,c,d; 1499 PetscTruth flg; 1500 PetscErrorCode ierr; 1501 1502 PetscFunctionBegin; 1503 a = matA->A; b = matA->B; 1504 c = matB->A; d = matB->B; 1505 1506 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1507 if (flg == PETSC_TRUE) { 1508 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1509 } 1510 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatCopy_MPIAIJ" 1516 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1517 { 1518 PetscErrorCode ierr; 1519 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1520 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1521 1522 PetscFunctionBegin; 1523 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1524 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1525 /* because of the column compression in the off-processor part of the matrix a->B, 1526 the number of columns in a->B and b->B may be different, hence we cannot call 1527 the MatCopy() directly on the two parts. If need be, we can provide a more 1528 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1529 then copying the submatrices */ 1530 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1531 } else { 1532 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1533 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1534 } 1535 PetscFunctionReturn(0); 1536 } 1537 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1540 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1541 { 1542 PetscErrorCode ierr; 1543 1544 PetscFunctionBegin; 1545 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 1549 #include "petscblaslapack.h" 1550 #undef __FUNCT__ 1551 #define __FUNCT__ "MatAXPY_MPIAIJ" 1552 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1553 { 1554 PetscErrorCode ierr; 1555 int i; 1556 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1557 PetscBLASInt bnz,one=1; 1558 Mat_SeqAIJ *x,*y; 1559 1560 PetscFunctionBegin; 1561 if (str == SAME_NONZERO_PATTERN) { 1562 x = (Mat_SeqAIJ *)xx->A->data; 1563 y = (Mat_SeqAIJ *)yy->A->data; 1564 bnz = (PetscBLASInt)x->nz; 1565 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1566 x = (Mat_SeqAIJ *)xx->B->data; 1567 y = (Mat_SeqAIJ *)yy->B->data; 1568 bnz = (PetscBLASInt)x->nz; 1569 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1570 } else if (str == SUBSET_NONZERO_PATTERN) { 1571 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1572 1573 x = (Mat_SeqAIJ *)xx->B->data; 1574 y = (Mat_SeqAIJ *)yy->B->data; 1575 if (y->xtoy && y->XtoY != xx->B) { 1576 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1577 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1578 } 1579 if (!y->xtoy) { /* get xtoy */ 1580 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1581 y->XtoY = xx->B; 1582 } 1583 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1584 } else { 1585 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1586 } 1587 PetscFunctionReturn(0); 1588 } 1589 1590 /* -------------------------------------------------------------------*/ 1591 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1592 MatGetRow_MPIAIJ, 1593 MatRestoreRow_MPIAIJ, 1594 MatMult_MPIAIJ, 1595 /* 4*/ MatMultAdd_MPIAIJ, 1596 MatMultTranspose_MPIAIJ, 1597 MatMultTransposeAdd_MPIAIJ, 1598 0, 1599 0, 1600 0, 1601 /*10*/ 0, 1602 0, 1603 0, 1604 MatRelax_MPIAIJ, 1605 MatTranspose_MPIAIJ, 1606 /*15*/ MatGetInfo_MPIAIJ, 1607 MatEqual_MPIAIJ, 1608 MatGetDiagonal_MPIAIJ, 1609 MatDiagonalScale_MPIAIJ, 1610 MatNorm_MPIAIJ, 1611 /*20*/ MatAssemblyBegin_MPIAIJ, 1612 MatAssemblyEnd_MPIAIJ, 1613 0, 1614 MatSetOption_MPIAIJ, 1615 MatZeroEntries_MPIAIJ, 1616 /*25*/ MatZeroRows_MPIAIJ, 1617 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1618 MatLUFactorSymbolic_MPIAIJ_TFS, 1619 #else 1620 0, 1621 #endif 1622 0, 1623 0, 1624 0, 1625 /*30*/ MatSetUpPreallocation_MPIAIJ, 1626 0, 1627 0, 1628 0, 1629 0, 1630 /*35*/ MatDuplicate_MPIAIJ, 1631 0, 1632 0, 1633 0, 1634 0, 1635 /*40*/ MatAXPY_MPIAIJ, 1636 MatGetSubMatrices_MPIAIJ, 1637 MatIncreaseOverlap_MPIAIJ, 1638 MatGetValues_MPIAIJ, 1639 MatCopy_MPIAIJ, 1640 /*45*/ MatPrintHelp_MPIAIJ, 1641 MatScale_MPIAIJ, 1642 0, 1643 0, 1644 0, 1645 /*50*/ MatGetBlockSize_MPIAIJ, 1646 0, 1647 0, 1648 0, 1649 0, 1650 /*55*/ MatFDColoringCreate_MPIAIJ, 1651 0, 1652 MatSetUnfactored_MPIAIJ, 1653 0, 1654 0, 1655 /*60*/ MatGetSubMatrix_MPIAIJ, 1656 MatDestroy_MPIAIJ, 1657 MatView_MPIAIJ, 1658 MatGetPetscMaps_Petsc, 1659 0, 1660 /*65*/ 0, 1661 0, 1662 0, 1663 0, 1664 0, 1665 /*70*/ 0, 1666 0, 1667 MatSetColoring_MPIAIJ, 1668 MatSetValuesAdic_MPIAIJ, 1669 MatSetValuesAdifor_MPIAIJ, 1670 /*75*/ 0, 1671 0, 1672 0, 1673 0, 1674 0, 1675 /*80*/ 0, 1676 0, 1677 0, 1678 0, 1679 /*84*/ MatLoad_MPIAIJ, 1680 0, 1681 0, 1682 0, 1683 0, 1684 0, 1685 /*90*/ MatMatMult_MPIAIJ_MPIAIJ, 1686 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1687 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1688 MatPtAP_MPIAIJ_MPIAIJ, 1689 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 1690 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 1691 0, 1692 0, 1693 0}; 1694 1695 /* ----------------------------------------------------------------------------------------*/ 1696 1697 EXTERN_C_BEGIN 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1700 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1701 { 1702 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1703 PetscErrorCode ierr; 1704 1705 PetscFunctionBegin; 1706 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1707 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1708 PetscFunctionReturn(0); 1709 } 1710 EXTERN_C_END 1711 1712 EXTERN_C_BEGIN 1713 #undef __FUNCT__ 1714 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1715 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1716 { 1717 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1718 PetscErrorCode ierr; 1719 1720 PetscFunctionBegin; 1721 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1722 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1723 PetscFunctionReturn(0); 1724 } 1725 EXTERN_C_END 1726 1727 #include "petscpc.h" 1728 EXTERN_C_BEGIN 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1731 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1732 { 1733 Mat_MPIAIJ *b; 1734 PetscErrorCode ierr; 1735 int i; 1736 1737 PetscFunctionBegin; 1738 B->preallocated = PETSC_TRUE; 1739 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1740 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1741 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1742 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1743 if (d_nnz) { 1744 for (i=0; i<B->m; i++) { 1745 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1746 } 1747 } 1748 if (o_nnz) { 1749 for (i=0; i<B->m; i++) { 1750 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1751 } 1752 } 1753 b = (Mat_MPIAIJ*)B->data; 1754 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1755 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1756 1757 PetscFunctionReturn(0); 1758 } 1759 EXTERN_C_END 1760 1761 /*MC 1762 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1763 1764 Options Database Keys: 1765 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1766 1767 Level: beginner 1768 1769 .seealso: MatCreateMPIAIJ 1770 M*/ 1771 1772 EXTERN_C_BEGIN 1773 #undef __FUNCT__ 1774 #define __FUNCT__ "MatCreate_MPIAIJ" 1775 PetscErrorCode MatCreate_MPIAIJ(Mat B) 1776 { 1777 Mat_MPIAIJ *b; 1778 PetscErrorCode ierr; 1779 int i,size; 1780 1781 PetscFunctionBegin; 1782 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1783 1784 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1785 B->data = (void*)b; 1786 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1787 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1788 B->factor = 0; 1789 B->assembled = PETSC_FALSE; 1790 B->mapping = 0; 1791 1792 B->insertmode = NOT_SET_VALUES; 1793 b->size = size; 1794 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1795 1796 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1797 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1798 1799 /* the information in the maps duplicates the information computed below, eventually 1800 we should remove the duplicate information that is not contained in the maps */ 1801 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1802 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1803 1804 /* build local table of row and column ownerships */ 1805 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1806 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1807 b->cowners = b->rowners + b->size + 2; 1808 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1809 b->rowners[0] = 0; 1810 for (i=2; i<=b->size; i++) { 1811 b->rowners[i] += b->rowners[i-1]; 1812 } 1813 b->rstart = b->rowners[b->rank]; 1814 b->rend = b->rowners[b->rank+1]; 1815 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1816 b->cowners[0] = 0; 1817 for (i=2; i<=b->size; i++) { 1818 b->cowners[i] += b->cowners[i-1]; 1819 } 1820 b->cstart = b->cowners[b->rank]; 1821 b->cend = b->cowners[b->rank+1]; 1822 1823 /* build cache for off array entries formed */ 1824 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1825 b->donotstash = PETSC_FALSE; 1826 b->colmap = 0; 1827 b->garray = 0; 1828 b->roworiented = PETSC_TRUE; 1829 1830 /* stuff used for matrix vector multiply */ 1831 b->lvec = PETSC_NULL; 1832 b->Mvctx = PETSC_NULL; 1833 1834 /* stuff for MatGetRow() */ 1835 b->rowindices = 0; 1836 b->rowvalues = 0; 1837 b->getrowactive = PETSC_FALSE; 1838 1839 /* Explicitly create 2 MATSEQAIJ matrices. */ 1840 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1841 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1842 PetscLogObjectParent(B,b->A); 1843 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1844 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1845 PetscLogObjectParent(B,b->B); 1846 1847 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1848 "MatStoreValues_MPIAIJ", 1849 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1850 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1851 "MatRetrieveValues_MPIAIJ", 1852 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1853 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1854 "MatGetDiagonalBlock_MPIAIJ", 1855 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1856 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1857 "MatIsTranspose_MPIAIJ", 1858 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 1859 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1860 "MatMPIAIJSetPreallocation_MPIAIJ", 1861 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 1862 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1863 "MatDiagonalScaleLocal_MPIAIJ", 1864 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 1865 PetscFunctionReturn(0); 1866 } 1867 EXTERN_C_END 1868 1869 /*MC 1870 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1871 1872 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1873 and MATMPIAIJ otherwise. As a result, for single process communicators, 1874 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1875 for communicators controlling multiple processes. It is recommended that you call both of 1876 the above preallocation routines for simplicity. 1877 1878 Options Database Keys: 1879 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1880 1881 Level: beginner 1882 1883 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1884 M*/ 1885 1886 EXTERN_C_BEGIN 1887 #undef __FUNCT__ 1888 #define __FUNCT__ "MatCreate_AIJ" 1889 PetscErrorCode MatCreate_AIJ(Mat A) 1890 { 1891 PetscErrorCode ierr; 1892 int size; 1893 1894 PetscFunctionBegin; 1895 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1896 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1897 if (size == 1) { 1898 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1899 } else { 1900 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1901 } 1902 PetscFunctionReturn(0); 1903 } 1904 EXTERN_C_END 1905 1906 #undef __FUNCT__ 1907 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1908 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1909 { 1910 Mat mat; 1911 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1912 PetscErrorCode ierr; 1913 1914 PetscFunctionBegin; 1915 *newmat = 0; 1916 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1917 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1918 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1919 a = (Mat_MPIAIJ*)mat->data; 1920 1921 mat->factor = matin->factor; 1922 mat->assembled = PETSC_TRUE; 1923 mat->insertmode = NOT_SET_VALUES; 1924 mat->preallocated = PETSC_TRUE; 1925 1926 a->rstart = oldmat->rstart; 1927 a->rend = oldmat->rend; 1928 a->cstart = oldmat->cstart; 1929 a->cend = oldmat->cend; 1930 a->size = oldmat->size; 1931 a->rank = oldmat->rank; 1932 a->donotstash = oldmat->donotstash; 1933 a->roworiented = oldmat->roworiented; 1934 a->rowindices = 0; 1935 a->rowvalues = 0; 1936 a->getrowactive = PETSC_FALSE; 1937 1938 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1939 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1940 if (oldmat->colmap) { 1941 #if defined (PETSC_USE_CTABLE) 1942 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1943 #else 1944 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1945 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1946 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1947 #endif 1948 } else a->colmap = 0; 1949 if (oldmat->garray) { 1950 int len; 1951 len = oldmat->B->n; 1952 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1953 PetscLogObjectMemory(mat,len*sizeof(int)); 1954 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1955 } else a->garray = 0; 1956 1957 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1958 PetscLogObjectParent(mat,a->lvec); 1959 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1960 PetscLogObjectParent(mat,a->Mvctx); 1961 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1962 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1963 PetscLogObjectParent(mat,a->A); 1964 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1965 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1966 PetscLogObjectParent(mat,a->B); 1967 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1968 *newmat = mat; 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #include "petscsys.h" 1973 1974 #undef __FUNCT__ 1975 #define __FUNCT__ "MatLoad_MPIAIJ" 1976 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1977 { 1978 Mat A; 1979 PetscScalar *vals,*svals; 1980 MPI_Comm comm = ((PetscObject)viewer)->comm; 1981 MPI_Status status; 1982 PetscErrorCode ierr; 1983 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1984 int i,nz,j,rstart,rend,fd; 1985 int header[4],*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1986 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1987 int cend,cstart,n; 1988 1989 PetscFunctionBegin; 1990 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1991 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1992 if (!rank) { 1993 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1994 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1995 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1996 if (header[3] < 0) { 1997 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1998 } 1999 } 2000 2001 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2002 M = header[1]; N = header[2]; 2003 /* determine ownership of all rows */ 2004 m = M/size + ((M % size) > rank); 2005 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 2006 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2007 rowners[0] = 0; 2008 for (i=2; i<=size; i++) { 2009 rowners[i] += rowners[i-1]; 2010 } 2011 rstart = rowners[rank]; 2012 rend = rowners[rank+1]; 2013 2014 /* distribute row lengths to all processors */ 2015 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 2016 offlens = ourlens + (rend-rstart); 2017 if (!rank) { 2018 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 2019 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2020 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2021 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2022 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2023 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2024 } else { 2025 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2026 } 2027 2028 if (!rank) { 2029 /* calculate the number of nonzeros on each processor */ 2030 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2031 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2032 for (i=0; i<size; i++) { 2033 for (j=rowners[i]; j< rowners[i+1]; j++) { 2034 procsnz[i] += rowlengths[j]; 2035 } 2036 } 2037 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2038 2039 /* determine max buffer needed and allocate it */ 2040 maxnz = 0; 2041 for (i=0; i<size; i++) { 2042 maxnz = PetscMax(maxnz,procsnz[i]); 2043 } 2044 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2045 2046 /* read in my part of the matrix column indices */ 2047 nz = procsnz[0]; 2048 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 2049 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2050 2051 /* read in every one elses and ship off */ 2052 for (i=1; i<size; i++) { 2053 nz = procsnz[i]; 2054 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2055 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2056 } 2057 ierr = PetscFree(cols);CHKERRQ(ierr); 2058 } else { 2059 /* determine buffer space needed for message */ 2060 nz = 0; 2061 for (i=0; i<m; i++) { 2062 nz += ourlens[i]; 2063 } 2064 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2065 2066 /* receive message of column indices*/ 2067 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2068 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2069 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2070 } 2071 2072 /* determine column ownership if matrix is not square */ 2073 if (N != M) { 2074 n = N/size + ((N % size) > rank); 2075 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2076 cstart = cend - n; 2077 } else { 2078 cstart = rstart; 2079 cend = rend; 2080 n = cend - cstart; 2081 } 2082 2083 /* loop over local rows, determining number of off diagonal entries */ 2084 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2085 jj = 0; 2086 for (i=0; i<m; i++) { 2087 for (j=0; j<ourlens[i]; j++) { 2088 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2089 jj++; 2090 } 2091 } 2092 2093 /* create our matrix */ 2094 for (i=0; i<m; i++) { 2095 ourlens[i] -= offlens[i]; 2096 } 2097 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2098 ierr = MatSetType(A,type);CHKERRQ(ierr); 2099 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2100 2101 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2102 for (i=0; i<m; i++) { 2103 ourlens[i] += offlens[i]; 2104 } 2105 2106 if (!rank) { 2107 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2108 2109 /* read in my part of the matrix numerical values */ 2110 nz = procsnz[0]; 2111 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2112 2113 /* insert into matrix */ 2114 jj = rstart; 2115 smycols = mycols; 2116 svals = vals; 2117 for (i=0; i<m; i++) { 2118 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2119 smycols += ourlens[i]; 2120 svals += ourlens[i]; 2121 jj++; 2122 } 2123 2124 /* read in other processors and ship out */ 2125 for (i=1; i<size; i++) { 2126 nz = procsnz[i]; 2127 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2128 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2129 } 2130 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2131 } else { 2132 /* receive numeric values */ 2133 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2134 2135 /* receive message of values*/ 2136 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2137 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2138 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2139 2140 /* insert into matrix */ 2141 jj = rstart; 2142 smycols = mycols; 2143 svals = vals; 2144 for (i=0; i<m; i++) { 2145 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2146 smycols += ourlens[i]; 2147 svals += ourlens[i]; 2148 jj++; 2149 } 2150 } 2151 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2152 ierr = PetscFree(vals);CHKERRQ(ierr); 2153 ierr = PetscFree(mycols);CHKERRQ(ierr); 2154 ierr = PetscFree(rowners);CHKERRQ(ierr); 2155 2156 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2157 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 *newmat = A; 2159 PetscFunctionReturn(0); 2160 } 2161 2162 #undef __FUNCT__ 2163 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2164 /* 2165 Not great since it makes two copies of the submatrix, first an SeqAIJ 2166 in local and then by concatenating the local matrices the end result. 2167 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2168 */ 2169 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2170 { 2171 PetscErrorCode ierr; 2172 PetscMPIInt rank,size; 2173 int i,m,n,rstart,row,rend,nz,*cwork,j; 2174 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2175 Mat *local,M,Mreuse; 2176 PetscScalar *vwork,*aa; 2177 MPI_Comm comm = mat->comm; 2178 Mat_SeqAIJ *aij; 2179 2180 2181 PetscFunctionBegin; 2182 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2183 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2184 2185 if (call == MAT_REUSE_MATRIX) { 2186 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2187 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2188 local = &Mreuse; 2189 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2190 } else { 2191 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2192 Mreuse = *local; 2193 ierr = PetscFree(local);CHKERRQ(ierr); 2194 } 2195 2196 /* 2197 m - number of local rows 2198 n - number of columns (same on all processors) 2199 rstart - first row in new global matrix generated 2200 */ 2201 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2202 if (call == MAT_INITIAL_MATRIX) { 2203 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2204 ii = aij->i; 2205 jj = aij->j; 2206 2207 /* 2208 Determine the number of non-zeros in the diagonal and off-diagonal 2209 portions of the matrix in order to do correct preallocation 2210 */ 2211 2212 /* first get start and end of "diagonal" columns */ 2213 if (csize == PETSC_DECIDE) { 2214 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2215 if (mglobal == n) { /* square matrix */ 2216 nlocal = m; 2217 } else { 2218 nlocal = n/size + ((n % size) > rank); 2219 } 2220 } else { 2221 nlocal = csize; 2222 } 2223 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2224 rstart = rend - nlocal; 2225 if (rank == size - 1 && rend != n) { 2226 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2227 } 2228 2229 /* next, compute all the lengths */ 2230 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2231 olens = dlens + m; 2232 for (i=0; i<m; i++) { 2233 jend = ii[i+1] - ii[i]; 2234 olen = 0; 2235 dlen = 0; 2236 for (j=0; j<jend; j++) { 2237 if (*jj < rstart || *jj >= rend) olen++; 2238 else dlen++; 2239 jj++; 2240 } 2241 olens[i] = olen; 2242 dlens[i] = dlen; 2243 } 2244 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2245 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2246 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2247 ierr = PetscFree(dlens);CHKERRQ(ierr); 2248 } else { 2249 int ml,nl; 2250 2251 M = *newmat; 2252 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2253 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2254 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2255 /* 2256 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2257 rather than the slower MatSetValues(). 2258 */ 2259 M->was_assembled = PETSC_TRUE; 2260 M->assembled = PETSC_FALSE; 2261 } 2262 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2263 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2264 ii = aij->i; 2265 jj = aij->j; 2266 aa = aij->a; 2267 for (i=0; i<m; i++) { 2268 row = rstart + i; 2269 nz = ii[i+1] - ii[i]; 2270 cwork = jj; jj += nz; 2271 vwork = aa; aa += nz; 2272 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2273 } 2274 2275 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2276 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2277 *newmat = M; 2278 2279 /* save submatrix used in processor for next request */ 2280 if (call == MAT_INITIAL_MATRIX) { 2281 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2282 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2283 } 2284 2285 PetscFunctionReturn(0); 2286 } 2287 2288 #undef __FUNCT__ 2289 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2290 /*@C 2291 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2292 (the default parallel PETSc format). For good matrix assembly performance 2293 the user should preallocate the matrix storage by setting the parameters 2294 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2295 performance can be increased by more than a factor of 50. 2296 2297 Collective on MPI_Comm 2298 2299 Input Parameters: 2300 + A - the matrix 2301 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2302 (same value is used for all local rows) 2303 . d_nnz - array containing the number of nonzeros in the various rows of the 2304 DIAGONAL portion of the local submatrix (possibly different for each row) 2305 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2306 The size of this array is equal to the number of local rows, i.e 'm'. 2307 You must leave room for the diagonal entry even if it is zero. 2308 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2309 submatrix (same value is used for all local rows). 2310 - o_nnz - array containing the number of nonzeros in the various rows of the 2311 OFF-DIAGONAL portion of the local submatrix (possibly different for 2312 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2313 structure. The size of this array is equal to the number 2314 of local rows, i.e 'm'. 2315 2316 The AIJ format (also called the Yale sparse matrix format or 2317 compressed row storage), is fully compatible with standard Fortran 77 2318 storage. That is, the stored row and column indices can begin at 2319 either one (as in Fortran) or zero. See the users manual for details. 2320 2321 The user MUST specify either the local or global matrix dimensions 2322 (possibly both). 2323 2324 The parallel matrix is partitioned such that the first m0 rows belong to 2325 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2326 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2327 2328 The DIAGONAL portion of the local submatrix of a processor can be defined 2329 as the submatrix which is obtained by extraction the part corresponding 2330 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2331 first row that belongs to the processor, and r2 is the last row belonging 2332 to the this processor. This is a square mxm matrix. The remaining portion 2333 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2334 2335 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2336 2337 By default, this format uses inodes (identical nodes) when possible. 2338 We search for consecutive rows with the same nonzero structure, thereby 2339 reusing matrix information to achieve increased efficiency. 2340 2341 Options Database Keys: 2342 + -mat_aij_no_inode - Do not use inodes 2343 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2344 - -mat_aij_oneindex - Internally use indexing starting at 1 2345 rather than 0. Note that when calling MatSetValues(), 2346 the user still MUST index entries starting at 0! 2347 2348 Example usage: 2349 2350 Consider the following 8x8 matrix with 34 non-zero values, that is 2351 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2352 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2353 as follows: 2354 2355 .vb 2356 1 2 0 | 0 3 0 | 0 4 2357 Proc0 0 5 6 | 7 0 0 | 8 0 2358 9 0 10 | 11 0 0 | 12 0 2359 ------------------------------------- 2360 13 0 14 | 15 16 17 | 0 0 2361 Proc1 0 18 0 | 19 20 21 | 0 0 2362 0 0 0 | 22 23 0 | 24 0 2363 ------------------------------------- 2364 Proc2 25 26 27 | 0 0 28 | 29 0 2365 30 0 0 | 31 32 33 | 0 34 2366 .ve 2367 2368 This can be represented as a collection of submatrices as: 2369 2370 .vb 2371 A B C 2372 D E F 2373 G H I 2374 .ve 2375 2376 Where the submatrices A,B,C are owned by proc0, D,E,F are 2377 owned by proc1, G,H,I are owned by proc2. 2378 2379 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2380 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2381 The 'M','N' parameters are 8,8, and have the same values on all procs. 2382 2383 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2384 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2385 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2386 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2387 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2388 matrix, ans [DF] as another SeqAIJ matrix. 2389 2390 When d_nz, o_nz parameters are specified, d_nz storage elements are 2391 allocated for every row of the local diagonal submatrix, and o_nz 2392 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2393 One way to choose d_nz and o_nz is to use the max nonzerors per local 2394 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2395 In this case, the values of d_nz,o_nz are: 2396 .vb 2397 proc0 : dnz = 2, o_nz = 2 2398 proc1 : dnz = 3, o_nz = 2 2399 proc2 : dnz = 1, o_nz = 4 2400 .ve 2401 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2402 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2403 for proc3. i.e we are using 12+15+10=37 storage locations to store 2404 34 values. 2405 2406 When d_nnz, o_nnz parameters are specified, the storage is specified 2407 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2408 In the above case the values for d_nnz,o_nnz are: 2409 .vb 2410 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2411 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2412 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2413 .ve 2414 Here the space allocated is sum of all the above values i.e 34, and 2415 hence pre-allocation is perfect. 2416 2417 Level: intermediate 2418 2419 .keywords: matrix, aij, compressed row, sparse, parallel 2420 2421 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2422 @*/ 2423 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2424 { 2425 PetscErrorCode ierr,(*f)(Mat,int,const int[],int,const int[]); 2426 2427 PetscFunctionBegin; 2428 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2429 if (f) { 2430 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2431 } 2432 PetscFunctionReturn(0); 2433 } 2434 2435 #undef __FUNCT__ 2436 #define __FUNCT__ "MatCreateMPIAIJ" 2437 /*@C 2438 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2439 (the default parallel PETSc format). For good matrix assembly performance 2440 the user should preallocate the matrix storage by setting the parameters 2441 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2442 performance can be increased by more than a factor of 50. 2443 2444 Collective on MPI_Comm 2445 2446 Input Parameters: 2447 + comm - MPI communicator 2448 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2449 This value should be the same as the local size used in creating the 2450 y vector for the matrix-vector product y = Ax. 2451 . n - This value should be the same as the local size used in creating the 2452 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2453 calculated if N is given) For square matrices n is almost always m. 2454 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2455 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2456 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2457 (same value is used for all local rows) 2458 . d_nnz - array containing the number of nonzeros in the various rows of the 2459 DIAGONAL portion of the local submatrix (possibly different for each row) 2460 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2461 The size of this array is equal to the number of local rows, i.e 'm'. 2462 You must leave room for the diagonal entry even if it is zero. 2463 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2464 submatrix (same value is used for all local rows). 2465 - o_nnz - array containing the number of nonzeros in the various rows of the 2466 OFF-DIAGONAL portion of the local submatrix (possibly different for 2467 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2468 structure. The size of this array is equal to the number 2469 of local rows, i.e 'm'. 2470 2471 Output Parameter: 2472 . A - the matrix 2473 2474 Notes: 2475 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2476 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2477 storage requirements for this matrix. 2478 2479 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2480 processor than it must be used on all processors that share the object for 2481 that argument. 2482 2483 The AIJ format (also called the Yale sparse matrix format or 2484 compressed row storage), is fully compatible with standard Fortran 77 2485 storage. That is, the stored row and column indices can begin at 2486 either one (as in Fortran) or zero. See the users manual for details. 2487 2488 The user MUST specify either the local or global matrix dimensions 2489 (possibly both). 2490 2491 The parallel matrix is partitioned such that the first m0 rows belong to 2492 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2493 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2494 2495 The DIAGONAL portion of the local submatrix of a processor can be defined 2496 as the submatrix which is obtained by extraction the part corresponding 2497 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2498 first row that belongs to the processor, and r2 is the last row belonging 2499 to the this processor. This is a square mxm matrix. The remaining portion 2500 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2501 2502 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2503 2504 When calling this routine with a single process communicator, a matrix of 2505 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2506 type of communicator, use the construction mechanism: 2507 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2508 2509 By default, this format uses inodes (identical nodes) when possible. 2510 We search for consecutive rows with the same nonzero structure, thereby 2511 reusing matrix information to achieve increased efficiency. 2512 2513 Options Database Keys: 2514 + -mat_aij_no_inode - Do not use inodes 2515 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2516 - -mat_aij_oneindex - Internally use indexing starting at 1 2517 rather than 0. Note that when calling MatSetValues(), 2518 the user still MUST index entries starting at 0! 2519 2520 2521 Example usage: 2522 2523 Consider the following 8x8 matrix with 34 non-zero values, that is 2524 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2525 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2526 as follows: 2527 2528 .vb 2529 1 2 0 | 0 3 0 | 0 4 2530 Proc0 0 5 6 | 7 0 0 | 8 0 2531 9 0 10 | 11 0 0 | 12 0 2532 ------------------------------------- 2533 13 0 14 | 15 16 17 | 0 0 2534 Proc1 0 18 0 | 19 20 21 | 0 0 2535 0 0 0 | 22 23 0 | 24 0 2536 ------------------------------------- 2537 Proc2 25 26 27 | 0 0 28 | 29 0 2538 30 0 0 | 31 32 33 | 0 34 2539 .ve 2540 2541 This can be represented as a collection of submatrices as: 2542 2543 .vb 2544 A B C 2545 D E F 2546 G H I 2547 .ve 2548 2549 Where the submatrices A,B,C are owned by proc0, D,E,F are 2550 owned by proc1, G,H,I are owned by proc2. 2551 2552 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2553 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2554 The 'M','N' parameters are 8,8, and have the same values on all procs. 2555 2556 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2557 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2558 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2559 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2560 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2561 matrix, ans [DF] as another SeqAIJ matrix. 2562 2563 When d_nz, o_nz parameters are specified, d_nz storage elements are 2564 allocated for every row of the local diagonal submatrix, and o_nz 2565 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2566 One way to choose d_nz and o_nz is to use the max nonzerors per local 2567 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2568 In this case, the values of d_nz,o_nz are: 2569 .vb 2570 proc0 : dnz = 2, o_nz = 2 2571 proc1 : dnz = 3, o_nz = 2 2572 proc2 : dnz = 1, o_nz = 4 2573 .ve 2574 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2575 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2576 for proc3. i.e we are using 12+15+10=37 storage locations to store 2577 34 values. 2578 2579 When d_nnz, o_nnz parameters are specified, the storage is specified 2580 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2581 In the above case the values for d_nnz,o_nnz are: 2582 .vb 2583 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2584 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2585 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2586 .ve 2587 Here the space allocated is sum of all the above values i.e 34, and 2588 hence pre-allocation is perfect. 2589 2590 Level: intermediate 2591 2592 .keywords: matrix, aij, compressed row, sparse, parallel 2593 2594 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2595 @*/ 2596 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2597 { 2598 PetscErrorCode ierr; 2599 int size; 2600 2601 PetscFunctionBegin; 2602 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2603 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2604 if (size > 1) { 2605 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2606 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2607 } else { 2608 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2609 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2610 } 2611 PetscFunctionReturn(0); 2612 } 2613 2614 #undef __FUNCT__ 2615 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2616 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2617 { 2618 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2619 PetscFunctionBegin; 2620 *Ad = a->A; 2621 *Ao = a->B; 2622 *colmap = a->garray; 2623 PetscFunctionReturn(0); 2624 } 2625 2626 #undef __FUNCT__ 2627 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2628 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2629 { 2630 PetscErrorCode ierr; 2631 int i; 2632 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2633 2634 PetscFunctionBegin; 2635 if (coloring->ctype == IS_COLORING_LOCAL) { 2636 ISColoringValue *allcolors,*colors; 2637 ISColoring ocoloring; 2638 2639 /* set coloring for diagonal portion */ 2640 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2641 2642 /* set coloring for off-diagonal portion */ 2643 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2644 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2645 for (i=0; i<a->B->n; i++) { 2646 colors[i] = allcolors[a->garray[i]]; 2647 } 2648 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2649 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2650 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2651 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2652 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2653 ISColoringValue *colors; 2654 int *larray; 2655 ISColoring ocoloring; 2656 2657 /* set coloring for diagonal portion */ 2658 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2659 for (i=0; i<a->A->n; i++) { 2660 larray[i] = i + a->cstart; 2661 } 2662 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2663 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2664 for (i=0; i<a->A->n; i++) { 2665 colors[i] = coloring->colors[larray[i]]; 2666 } 2667 ierr = PetscFree(larray);CHKERRQ(ierr); 2668 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2669 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2670 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2671 2672 /* set coloring for off-diagonal portion */ 2673 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2674 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2675 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2676 for (i=0; i<a->B->n; i++) { 2677 colors[i] = coloring->colors[larray[i]]; 2678 } 2679 ierr = PetscFree(larray);CHKERRQ(ierr); 2680 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2681 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2682 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2683 } else { 2684 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2685 } 2686 2687 PetscFunctionReturn(0); 2688 } 2689 2690 #undef __FUNCT__ 2691 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2692 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2693 { 2694 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2695 PetscErrorCode ierr; 2696 2697 PetscFunctionBegin; 2698 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2699 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2700 PetscFunctionReturn(0); 2701 } 2702 2703 #undef __FUNCT__ 2704 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2705 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2706 { 2707 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2708 PetscErrorCode ierr; 2709 2710 PetscFunctionBegin; 2711 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2712 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2713 PetscFunctionReturn(0); 2714 } 2715 2716 #undef __FUNCT__ 2717 #define __FUNCT__ "MatMerge" 2718 /*@C 2719 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2720 matrices from each processor 2721 2722 Collective on MPI_Comm 2723 2724 Input Parameters: 2725 + comm - the communicators the parallel matrix will live on 2726 . inmat - the input sequential matrices 2727 . n - number of local columns (or PETSC_DECIDE) 2728 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2729 2730 Output Parameter: 2731 . outmat - the parallel matrix generated 2732 2733 Level: advanced 2734 2735 Notes: The number of columns of the matrix in EACH processor MUST be the same. 2736 2737 @*/ 2738 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 2739 { 2740 PetscErrorCode ierr; 2741 int m,N,i,rstart,nnz,I,*dnz,*onz; 2742 const int *indx; 2743 const PetscScalar *values; 2744 PetscMap columnmap,rowmap; 2745 2746 PetscFunctionBegin; 2747 2748 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 2749 if (scall == MAT_INITIAL_MATRIX){ 2750 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2751 if (n == PETSC_DECIDE){ 2752 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2753 ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr); 2754 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2755 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2756 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2757 } 2758 2759 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2760 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2761 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2762 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2763 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2764 2765 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2766 for (i=0;i<m;i++) { 2767 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2768 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2769 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2770 } 2771 /* This routine will ONLY return MPIAIJ type matrix */ 2772 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2773 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2774 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2775 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2776 2777 } else if (scall == MAT_REUSE_MATRIX){ 2778 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2779 } else { 2780 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 2781 } 2782 2783 for (i=0;i<m;i++) { 2784 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2785 I = i + rstart; 2786 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2787 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2788 } 2789 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2790 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2791 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2792 2793 PetscFunctionReturn(0); 2794 } 2795 2796 #undef __FUNCT__ 2797 #define __FUNCT__ "MatFileSplit" 2798 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2799 { 2800 PetscErrorCode ierr; 2801 PetscMPIInt rank; 2802 int m,N,i,rstart,nnz; 2803 size_t len; 2804 const int *indx; 2805 PetscViewer out; 2806 char *name; 2807 Mat B; 2808 const PetscScalar *values; 2809 2810 PetscFunctionBegin; 2811 2812 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2813 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2814 /* Should this be the type of the diagonal block of A? */ 2815 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2816 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2817 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2818 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2819 for (i=0;i<m;i++) { 2820 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2821 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2822 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2823 } 2824 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2825 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2826 2827 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2828 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2829 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2830 sprintf(name,"%s.%d",outfile,rank); 2831 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2832 ierr = PetscFree(name); 2833 ierr = MatView(B,out);CHKERRQ(ierr); 2834 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2835 ierr = MatDestroy(B);CHKERRQ(ierr); 2836 PetscFunctionReturn(0); 2837 } 2838 2839 typedef struct { /* used by MatMerge_SeqsToMPI for reusing the merged matrix */ 2840 PetscMap rowmap; 2841 int nrecv,nsend,*id_r,*bi,*bj,**ijbuf_r; 2842 int *len_sra; /* array of length 2*size, len_sra[i]/len_sra[size+i] 2843 store length of i-th send/recv matrix values */ 2844 MatScalar *abuf_s; 2845 } Mat_Merge_SeqsToMPI; 2846 2847 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat); 2848 #undef __FUNCT__ 2849 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 2850 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 2851 { 2852 PetscErrorCode ierr; 2853 Mat_Merge_SeqsToMPI *merge=(Mat_Merge_SeqsToMPI*)A->spptr; 2854 2855 PetscFunctionBegin; 2856 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 2857 ierr = PetscFree(merge->abuf_s);CHKERRQ(ierr); 2858 ierr = PetscFree(merge->len_sra);CHKERRQ(ierr); 2859 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 2860 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 2861 ierr = PetscFree(merge->ijbuf_r);CHKERRQ(ierr); 2862 ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr); 2863 ierr = PetscFree(merge);CHKERRQ(ierr); 2864 2865 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 2866 PetscFunctionReturn(0); 2867 } 2868 2869 #include "src/mat/utils/freespace.h" 2870 #include "petscbt.h" 2871 #undef __FUNCT__ 2872 #define __FUNCT__ "MatMerge_SeqsToMPI" 2873 /*@C 2874 MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential 2875 matrices from each processor 2876 2877 Collective on MPI_Comm 2878 2879 Input Parameters: 2880 + comm - the communicators the parallel matrix will live on 2881 . seqmat - the input sequential matrices 2882 . m - number of local rows (or PETSC_DECIDE) 2883 . n - number of local columns (or PETSC_DECIDE) 2884 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2885 2886 Output Parameter: 2887 . mpimat - the parallel matrix generated 2888 2889 Level: advanced 2890 2891 Notes: The dimensions of the sequential matrix in each processor 2892 MUST be the same. 2893 2894 @*/ 2895 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 2896 { 2897 PetscErrorCode ierr; 2898 Mat B_seq,B_mpi; 2899 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 2900 PetscMPIInt size,rank; 2901 int M=seqmat->m,N=seqmat->n,i,j,*owners,*ai=a->i,*aj=a->j; 2902 int tag,taga,len,len_a = 0,*len_s,*len_sa,proc,*len_r,*len_ra; 2903 int **ijbuf_r,*ijbuf_s,*nnz_ptr,k,anzi,*bj_i,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0,nextaj; 2904 MPI_Request *s_waits,*r_waits,*s_waitsa,*r_waitsa; 2905 MPI_Status *status; 2906 MatScalar *ba,*aa=a->a,**abuf_r,*abuf_i,*ba_i; 2907 FreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2908 PetscBT lnkbt; 2909 Mat_Merge_SeqsToMPI *merge; 2910 2911 PetscFunctionBegin; 2912 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2913 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2914 /* ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] nnz(C): %d\n",rank,ai[M]); */ 2915 if (scall == MAT_INITIAL_MATRIX){ 2916 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 2917 } else if (scall == MAT_REUSE_MATRIX){ 2918 B_mpi = *mpimat; 2919 merge = (Mat_Merge_SeqsToMPI*)B_mpi->spptr; 2920 bi = merge->bi; 2921 bj = merge->bj; 2922 ijbuf_r = merge->ijbuf_r; 2923 } else { 2924 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 2925 } 2926 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 2927 2928 /* determine the number of messages to send, their lengths */ 2929 /*---------------------------------------------------------*/ 2930 if (scall == MAT_INITIAL_MATRIX){ 2931 ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr); 2932 if (m == PETSC_DECIDE) { 2933 ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr); 2934 } else { 2935 ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 2936 } 2937 ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr); 2938 ierr = PetscMalloc(size*sizeof(int),&len_s);CHKERRQ(ierr); 2939 ierr = PetscMalloc(2*size*sizeof(int),&merge->len_sra);CHKERRQ(ierr); 2940 } 2941 if (m == PETSC_DECIDE) ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); 2942 ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr); 2943 2944 len_sa = merge->len_sra; 2945 len_ra = merge->len_sra + size; 2946 2947 if (scall == MAT_INITIAL_MATRIX){ 2948 merge->nsend = 0; 2949 len = len_a = 0; 2950 for (proc=0; proc<size; proc++){ 2951 len_s[proc] = len_sa[proc] = 0; 2952 if (proc == rank) continue; 2953 for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */ 2954 len_sa[proc] += ai[i+1] - ai[i]; /* num of cols sent to [proc] */ 2955 } 2956 if (len_sa[proc]) { 2957 merge->nsend++; 2958 len_s[proc] = len_sa[proc] + owners[proc+1] - owners[proc]; 2959 if (len < len_s[proc]) len = len_s[proc]; 2960 if (len_a < len_sa[proc]) len_a = len_sa[proc]; 2961 } 2962 } 2963 2964 /* determine the number and length of messages to receive */ 2965 /*--------------------------------------------------------*/ 2966 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 2967 ierr = PetscGatherMessageLengths(comm,merge->nsend,merge->nrecv,len_s,&merge->id_r,&len_r);CHKERRQ(ierr); 2968 /* 2969 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] nsend: %d, nrecv: %d\n",rank,merge->nsend,merge->nrecv); 2970 for (i=0; i<merge->nrecv; i++){ 2971 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] expects recv len_r=%d from [%d]\n",rank,len_r[i],merge->id_r[i]); 2972 } 2973 */ 2974 2975 /* post the Irecvs corresponding to these messages */ 2976 /*-------------------------------------------------*/ 2977 /* get new tag to keep the communication clean */ 2978 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tag);CHKERRQ(ierr); 2979 ierr = PetscPostIrecvInt(comm,tag,merge->nrecv,merge->id_r,len_r,&ijbuf_r,&r_waits);CHKERRQ(ierr); 2980 2981 /* post the sends */ 2982 /*----------------*/ 2983 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 2984 ierr = PetscMalloc((len+1)*sizeof(int),&ijbuf_s);CHKERRQ(ierr); 2985 k = 0; 2986 for (proc=0; proc<size; proc++){ 2987 if (!len_s[proc]) continue; 2988 /* form outgoing ij data to be sent to [proc] */ 2989 nnz_ptr = ijbuf_s + owners[proc+1] - owners[proc]; 2990 for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */ 2991 anzi = ai[i+1] - ai[i]; 2992 if (i==owners[proc]){ 2993 ijbuf_s[i-owners[proc]] = anzi; 2994 } else { 2995 ijbuf_s[i-owners[proc]] = ijbuf_s[i-owners[proc]-1]+ anzi; 2996 } 2997 for (j=0; j <anzi; j++){ 2998 *nnz_ptr = *(aj+ai[i]+j); nnz_ptr++; 2999 } 3000 } 3001 ierr = MPI_Isend(ijbuf_s,len_s[proc],MPI_INT,proc,tag,comm,s_waits+k);CHKERRQ(ierr); 3002 k++; 3003 } 3004 3005 /* receives and sends of ij-structure are complete, deallocate memories */ 3006 /*----------------------------------------------------------------------*/ 3007 ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr); 3008 ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr); 3009 3010 ierr = PetscFree(ijbuf_s);CHKERRQ(ierr); 3011 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3012 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3013 } 3014 3015 /* send and recv matrix values */ 3016 /*-----------------------------*/ 3017 if (scall == MAT_INITIAL_MATRIX){ 3018 ierr = PetscMalloc((len_a+1)*sizeof(MatScalar),&merge->abuf_s);CHKERRQ(ierr); 3019 for (i=0; i<merge->nrecv; i++) len_ra[i] = len_r[i]-m; /* length of seqmat->a to be received from a proc */ 3020 } 3021 ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr); 3022 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,len_ra,&abuf_r,&r_waitsa);CHKERRQ(ierr); 3023 3024 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waitsa);CHKERRQ(ierr); 3025 k = 0; 3026 for (proc=0; proc<size; proc++){ 3027 if (!len_sa[proc]) continue; 3028 /* form outgoing mat value data to be sent to [proc] */ 3029 abuf_i = merge->abuf_s; 3030 for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */ 3031 anzi = ai[i+1] - ai[i]; 3032 for (j=0; j <anzi; j++){ 3033 *abuf_i = *(aa+ai[i]+j); abuf_i++; 3034 } 3035 } 3036 ierr = MPI_Isend(merge->abuf_s,len_sa[proc],MPIU_MATSCALAR,proc,taga,comm,s_waitsa+k);CHKERRQ(ierr); 3037 k++; 3038 } 3039 3040 ierr = MPI_Waitall(merge->nrecv,r_waitsa,status);CHKERRQ(ierr); 3041 ierr = MPI_Waitall(merge->nsend,s_waitsa,status);CHKERRQ(ierr); 3042 ierr = PetscFree(status);CHKERRQ(ierr); 3043 3044 ierr = PetscFree(s_waitsa);CHKERRQ(ierr); 3045 ierr = PetscFree(r_waitsa);CHKERRQ(ierr); 3046 3047 /* create seq matrix B_seq in each processor */ 3048 /*-------------------------------------------*/ 3049 if (scall == MAT_INITIAL_MATRIX){ 3050 ierr = PetscFree(len_s);CHKERRQ(ierr); 3051 /* allocate bi array and free space for accumulating nonzero column info */ 3052 ierr = PetscMalloc((m+1)*sizeof(int),&bi);CHKERRQ(ierr); 3053 bi[0] = 0; 3054 3055 /* create and initialize a linked list */ 3056 nlnk = N+1; 3057 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3058 3059 /* initial FreeSpace size is (nproc)*(num of local nnz(seqmat)) */ 3060 len = 0; 3061 for (i=owners[rank]; i<owners[rank+1]; i++) len += ai[i+1] - ai[i]; 3062 ierr = GetMoreSpace((int)(size*len+1),&free_space);CHKERRQ(ierr); 3063 current_space = free_space; 3064 3065 /* determine symbolic info for each row of B_seq */ 3066 for (i=0;i<m;i++) { 3067 bnzi = 0; 3068 /* add local non-zero cols of this proc's seqmat into lnk */ 3069 arow = owners[rank] + i; 3070 anzi = ai[arow+1] - ai[arow]; 3071 aj = a->j + ai[arow]; 3072 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3073 bnzi += nlnk; 3074 /* add received col data into lnk */ 3075 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3076 /* i-th row */ 3077 if (i == 0){ 3078 anzi = *(ijbuf_r[k]+i); 3079 aj = ijbuf_r[k] + m; 3080 } else { 3081 anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1); 3082 aj = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1); 3083 } 3084 ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3085 bnzi += nlnk; 3086 } 3087 3088 /* if free space is not available, make more free space */ 3089 if (current_space->local_remaining<bnzi) { 3090 ierr = GetMoreSpace(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 3091 nspacedouble++; 3092 } 3093 /* copy data into free space, then initialize lnk */ 3094 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3095 current_space->array += bnzi; 3096 current_space->local_used += bnzi; 3097 current_space->local_remaining -= bnzi; 3098 3099 bi[i+1] = bi[i] + bnzi; 3100 } 3101 ierr = PetscMalloc((bi[m]+1)*sizeof(int),&bj);CHKERRQ(ierr); 3102 ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 3103 3104 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3105 ierr = PetscFree(len_r);CHKERRQ(ierr); 3106 3107 /* allocate space for ba */ 3108 ierr = PetscMalloc((bi[m]+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3109 ierr = PetscMemzero(ba,(bi[m]+1)*sizeof(MatScalar));CHKERRQ(ierr); 3110 3111 /* put together B_seq */ 3112 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,bi,bj,ba,&B_seq);CHKERRQ(ierr); 3113 ierr = PetscFree(ba);CHKERRQ(ierr); /* bi and bj are saved for reuse */ 3114 3115 /* create symbolic parallel matrix B_mpi by concatinating B_seq */ 3116 /*--------------------------------------------------------------*/ 3117 ierr = MatMerge(comm,B_seq,n,scall,&B_mpi);CHKERRQ(ierr); 3118 } /* endof if (scall == MAT_INITIAL_MATRIX) */ 3119 3120 /* insert mat values of B_mpi */ 3121 /*----------------------------*/ 3122 ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr); 3123 3124 /* set values of ba */ 3125 for (i=0; i<m; i++) { 3126 arow = owners[rank] + i; 3127 bj_i = bj+bi[i]; /* col indices of the i-th row of B_seq */ 3128 bnzi = bi[i+1] - bi[i]; 3129 ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr); 3130 3131 /* add local non-zero vals of this proc's seqmat into ba */ 3132 anzi = ai[arow+1] - ai[arow]; 3133 aj = a->j + ai[arow]; 3134 aa = a->a + ai[arow]; 3135 nextaj = 0; 3136 for (j=0; nextaj<anzi; j++){ 3137 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3138 ba_i[j] += aa[nextaj++]; 3139 } 3140 } 3141 3142 /* add received vals into ba */ 3143 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 3144 /* i-th row */ 3145 if (i == 0){ 3146 anzi = *(ijbuf_r[k]+i); 3147 aj = ijbuf_r[k] + m; 3148 aa = abuf_r[k]; 3149 } else { 3150 anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1); 3151 aj = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1); 3152 aa = abuf_r[k] + *(ijbuf_r[k]+i-1); 3153 } 3154 nextaj = 0; 3155 for (j=0; nextaj<anzi; j++){ 3156 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 3157 ba_i[j] += aa[nextaj++]; 3158 } 3159 } 3160 } 3161 3162 ierr = MatSetValues(B_mpi,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 3163 } 3164 ierr = MatAssemblyBegin(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3165 ierr = MatAssemblyEnd(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3166 3167 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 3168 ierr = PetscFree(ba_i);CHKERRQ(ierr); 3169 3170 /* attach the supporting struct to B_mpi for reuse */ 3171 /*-------------------------------------------------*/ 3172 if (scall == MAT_INITIAL_MATRIX){ 3173 B_mpi->spptr = (void*)merge; 3174 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 3175 merge->bi = bi; 3176 merge->bj = bj; 3177 merge->ijbuf_r = ijbuf_r; 3178 3179 *mpimat = B_mpi; 3180 } 3181 PetscFunctionReturn(0); 3182 } 3183 3184 #undef __FUNCT__ 3185 #define __FUNCT__ "MatGetLocalMat" 3186 /*@C 3187 MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows 3188 3189 Collective on Mat 3190 3191 Input Parameters: 3192 + A - the matrix 3193 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3194 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 3195 3196 Output Parameter: 3197 . A_loc - the local sequential matrix generated 3198 3199 Level: developer 3200 3201 @*/ 3202 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 3203 { 3204 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 3205 PetscErrorCode ierr; 3206 int *idx,i,start,end,ncols,nzA,nzB,*cmap,imark; 3207 IS isrowa,iscola; 3208 Mat *aloc; 3209 3210 PetscFunctionBegin; 3211 if (!row){ 3212 start = a->rstart; end = a->rend; 3213 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 3214 } else { 3215 isrowa = *row; 3216 } 3217 if (!col){ 3218 start = a->cstart; 3219 cmap = a->garray; 3220 nzA = a->A->n; 3221 nzB = a->B->n; 3222 ierr = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr); 3223 ncols = 0; 3224 for (i=0; i<nzB; i++) { 3225 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3226 else break; 3227 } 3228 imark = i; 3229 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3230 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3231 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr); 3232 ierr = PetscFree(idx);CHKERRQ(ierr); 3233 } else { 3234 iscola = *col; 3235 } 3236 if (scall != MAT_INITIAL_MATRIX){ 3237 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 3238 aloc[0] = *A_loc; 3239 } 3240 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 3241 *A_loc = aloc[0]; 3242 ierr = PetscFree(aloc);CHKERRQ(ierr); 3243 if (!row){ 3244 ierr = ISDestroy(isrowa);CHKERRQ(ierr); 3245 } 3246 if (!col){ 3247 ierr = ISDestroy(iscola);CHKERRQ(ierr); 3248 } 3249 PetscFunctionReturn(0); 3250 } 3251 3252 #undef __FUNCT__ 3253 #define __FUNCT__ "MatGetBrowsOfAcols" 3254 /*@C 3255 MatGetLocalMat - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero col of A 3256 3257 Collective on Mat 3258 3259 Input Parameters: 3260 + A,B - the matrices 3261 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 3262 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 3263 3264 Output Parameter: 3265 + rowb, colb - index sets of rows and columns of B to extract 3266 . brstart - row index of B_seq from which next B->m rows are taken from B's local rows 3267 - B_seq - the sequential matrix generated 3268 3269 Level: developer 3270 3271 @*/ 3272 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq) 3273 { 3274 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data; 3275 PetscErrorCode ierr; 3276 int *idx,i,start,ncols,nzA,nzB,*cmap,imark; 3277 IS isrowb,iscolb; 3278 Mat *bseq; 3279 3280 PetscFunctionBegin; 3281 if (a->cstart != b->rstart || a->cend != b->rend){ 3282 SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend); 3283 } 3284 3285 if (scall == MAT_INITIAL_MATRIX){ 3286 start = a->cstart; 3287 cmap = a->garray; 3288 nzA = a->A->n; 3289 nzB = a->B->n; 3290 ierr = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr); 3291 ncols = 0; 3292 for (i=0; i<nzB; i++) { 3293 if (cmap[i] < start) idx[ncols++] = cmap[i]; 3294 else break; 3295 } 3296 imark = i; 3297 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 3298 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 3299 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr); 3300 ierr = PetscFree(idx);CHKERRQ(ierr); 3301 *brstart = imark; 3302 ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr); 3303 } else { 3304 if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 3305 isrowb = *rowb; iscolb = *colb; 3306 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 3307 bseq[0] = *B_seq; 3308 } 3309 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 3310 *B_seq = bseq[0]; 3311 ierr = PetscFree(bseq);CHKERRQ(ierr); 3312 if (!rowb){ 3313 ierr = ISDestroy(isrowb);CHKERRQ(ierr); 3314 } else { 3315 *rowb = isrowb; 3316 } 3317 if (!colb){ 3318 ierr = ISDestroy(iscolb);CHKERRQ(ierr); 3319 } else { 3320 *colb = iscolb; 3321 } 3322 3323 PetscFunctionReturn(0); 3324 } 3325