1 2 #include "src/mat/impls/aij/mpi/mpiaij.h" 3 #include "src/inline/spops.h" 4 5 /* 6 Local utility routine that creates a mapping from the global column 7 number to the local number in the off-diagonal part of the local 8 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 9 a slightly higher hash table cost; without it it is not scalable (each processor 10 has an order N integer array but is fast to acess. 11 */ 12 #undef __FUNCT__ 13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat) 15 { 16 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 17 int n = aij->B->n,i,ierr; 18 19 PetscFunctionBegin; 20 #if defined (PETSC_USE_CTABLE) 21 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 22 for (i=0; i<n; i++){ 23 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 24 } 25 #else 26 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 27 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 28 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 29 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 30 #endif 31 PetscFunctionReturn(0); 32 } 33 34 #define CHUNKSIZE 15 35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 36 { \ 37 \ 38 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 39 rmax = aimax[row]; nrow = ailen[row]; \ 40 col1 = col - shift; \ 41 \ 42 low = 0; high = nrow; \ 43 while (high-low > 5) { \ 44 t = (low+high)/2; \ 45 if (rp[t] > col) high = t; \ 46 else low = t; \ 47 } \ 48 for (_i=low; _i<high; _i++) { \ 49 if (rp[_i] > col1) break; \ 50 if (rp[_i] == col1) { \ 51 if (addv == ADD_VALUES) ap[_i] += value; \ 52 else ap[_i] = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (nonew == 1) goto a_noinsert; \ 57 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 58 if (nrow >= rmax) { \ 59 /* there is no extra room in row, therefore enlarge */ \ 60 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 61 PetscScalar *new_a; \ 62 \ 63 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 64 \ 65 /* malloc new storage space */ \ 66 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 67 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 68 new_j = (int*)(new_a + new_nz); \ 69 new_i = new_j + new_nz; \ 70 \ 71 /* copy over old data into new slots */ \ 72 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 73 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 74 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 75 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 76 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 77 len*sizeof(int));CHKERRQ(ierr); \ 78 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 79 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 80 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 81 /* free up old matrix storage */ \ 82 \ 83 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 84 if (!a->singlemalloc) { \ 85 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 86 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 87 } \ 88 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 89 a->singlemalloc = PETSC_TRUE; \ 90 \ 91 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 92 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 93 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 94 a->maxnz += CHUNKSIZE; \ 95 a->reallocs++; \ 96 } \ 97 N = nrow++ - 1; a->nz++; \ 98 /* shift up all the later entries in this row */ \ 99 for (ii=N; ii>=_i; ii--) { \ 100 rp[ii+1] = rp[ii]; \ 101 ap[ii+1] = ap[ii]; \ 102 } \ 103 rp[_i] = col1; \ 104 ap[_i] = value; \ 105 a_noinsert: ; \ 106 ailen[row] = nrow; \ 107 } 108 109 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 110 { \ 111 \ 112 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 113 rmax = bimax[row]; nrow = bilen[row]; \ 114 col1 = col - shift; \ 115 \ 116 low = 0; high = nrow; \ 117 while (high-low > 5) { \ 118 t = (low+high)/2; \ 119 if (rp[t] > col) high = t; \ 120 else low = t; \ 121 } \ 122 for (_i=low; _i<high; _i++) { \ 123 if (rp[_i] > col1) break; \ 124 if (rp[_i] == col1) { \ 125 if (addv == ADD_VALUES) ap[_i] += value; \ 126 else ap[_i] = value; \ 127 goto b_noinsert; \ 128 } \ 129 } \ 130 if (nonew == 1) goto b_noinsert; \ 131 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \ 132 if (nrow >= rmax) { \ 133 /* there is no extra room in row, therefore enlarge */ \ 134 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 135 PetscScalar *new_a; \ 136 \ 137 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \ 138 \ 139 /* malloc new storage space */ \ 140 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 141 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 142 new_j = (int*)(new_a + new_nz); \ 143 new_i = new_j + new_nz; \ 144 \ 145 /* copy over old data into new slots */ \ 146 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 147 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 148 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 149 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 150 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 151 len*sizeof(int));CHKERRQ(ierr); \ 152 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 153 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 154 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 155 /* free up old matrix storage */ \ 156 \ 157 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 158 if (!b->singlemalloc) { \ 159 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 160 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 161 } \ 162 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 163 b->singlemalloc = PETSC_TRUE; \ 164 \ 165 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 166 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 167 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 168 b->maxnz += CHUNKSIZE; \ 169 b->reallocs++; \ 170 } \ 171 N = nrow++ - 1; b->nz++; \ 172 /* shift up all the later entries in this row */ \ 173 for (ii=N; ii>=_i; ii--) { \ 174 rp[ii+1] = rp[ii]; \ 175 ap[ii+1] = ap[ii]; \ 176 } \ 177 rp[_i] = col1; \ 178 ap[_i] = value; \ 179 b_noinsert: ; \ 180 bilen[row] = nrow; \ 181 } 182 183 #undef __FUNCT__ 184 #define __FUNCT__ "MatSetValues_MPIAIJ" 185 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 186 { 187 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 188 PetscScalar value; 189 PetscErrorCode ierr; 190 int i,j,rstart = aij->rstart,rend = aij->rend; 191 int cstart = aij->cstart,cend = aij->cend,row,col; 192 PetscTruth roworiented = aij->roworiented; 193 194 /* Some Variables required in the macro */ 195 Mat A = aij->A; 196 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 197 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 198 PetscScalar *aa = a->a; 199 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 200 Mat B = aij->B; 201 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 202 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 203 PetscScalar *ba = b->a; 204 205 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 206 int nonew = a->nonew,shift=0; 207 PetscScalar *ap; 208 209 PetscFunctionBegin; 210 for (i=0; i<m; i++) { 211 if (im[i] < 0) continue; 212 #if defined(PETSC_USE_BOPT_g) 213 if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1); 214 #endif 215 if (im[i] >= rstart && im[i] < rend) { 216 row = im[i] - rstart; 217 for (j=0; j<n; j++) { 218 if (in[j] >= cstart && in[j] < cend){ 219 col = in[j] - cstart; 220 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 221 if (ignorezeroentries && value == 0.0) continue; 222 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 223 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 224 } else if (in[j] < 0) continue; 225 #if defined(PETSC_USE_BOPT_g) 226 else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);} 227 #endif 228 else { 229 if (mat->was_assembled) { 230 if (!aij->colmap) { 231 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 232 } 233 #if defined (PETSC_USE_CTABLE) 234 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 235 col--; 236 #else 237 col = aij->colmap[in[j]] - 1; 238 #endif 239 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 240 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 241 col = in[j]; 242 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 243 B = aij->B; 244 b = (Mat_SeqAIJ*)B->data; 245 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 246 ba = b->a; 247 } 248 } else col = in[j]; 249 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 250 if (ignorezeroentries && value == 0.0) continue; 251 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 252 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 253 } 254 } 255 } else { 256 if (!aij->donotstash) { 257 if (roworiented) { 258 if (ignorezeroentries && v[i*n] == 0.0) continue; 259 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 260 } else { 261 if (ignorezeroentries && v[i] == 0.0) continue; 262 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatGetValues_MPIAIJ" 272 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[]) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 275 PetscErrorCode ierr; 276 int i,j,rstart = aij->rstart,rend = aij->rend; 277 int cstart = aij->cstart,cend = aij->cend,row,col; 278 279 PetscFunctionBegin; 280 for (i=0; i<m; i++) { 281 if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]); 282 if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1); 283 if (idxm[i] >= rstart && idxm[i] < rend) { 284 row = idxm[i] - rstart; 285 for (j=0; j<n; j++) { 286 if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]); 287 if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1); 288 if (idxn[j] >= cstart && idxn[j] < cend){ 289 col = idxn[j] - cstart; 290 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 291 } else { 292 if (!aij->colmap) { 293 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 294 } 295 #if defined (PETSC_USE_CTABLE) 296 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 297 col --; 298 #else 299 col = aij->colmap[idxn[j]] - 1; 300 #endif 301 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 302 else { 303 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 304 } 305 } 306 } 307 } else { 308 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 309 } 310 } 311 PetscFunctionReturn(0); 312 } 313 314 #undef __FUNCT__ 315 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 316 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 317 { 318 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 319 PetscErrorCode ierr; 320 int nstash,reallocs; 321 InsertMode addv; 322 323 PetscFunctionBegin; 324 if (aij->donotstash) { 325 PetscFunctionReturn(0); 326 } 327 328 /* make sure all processors are either in INSERTMODE or ADDMODE */ 329 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 330 if (addv == (ADD_VALUES|INSERT_VALUES)) { 331 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 332 } 333 mat->insertmode = addv; /* in case this processor had no cache */ 334 335 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 336 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 337 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 338 PetscFunctionReturn(0); 339 } 340 341 342 #undef __FUNCT__ 343 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 344 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 345 { 346 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 347 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data; 348 int i,j,rstart,ncols,n,ierr,flg; 349 int *row,*col,other_disassembled; 350 PetscScalar *val; 351 InsertMode addv = mat->insertmode; 352 353 PetscFunctionBegin; 354 if (!aij->donotstash) { 355 while (1) { 356 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 357 if (!flg) break; 358 359 for (i=0; i<n;) { 360 /* Now identify the consecutive vals belonging to the same row */ 361 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 362 if (j < n) ncols = j-i; 363 else ncols = n-i; 364 /* Now assemble all these values with a single function call */ 365 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 366 i = j; 367 } 368 } 369 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 370 } 371 372 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 373 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 374 375 /* determine if any processor has disassembled, if so we must 376 also disassemble ourselfs, in order that we may reassemble. */ 377 /* 378 if nonzero structure of submatrix B cannot change then we know that 379 no processor disassembled thus we can skip this stuff 380 */ 381 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 382 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 383 if (mat->was_assembled && !other_disassembled) { 384 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 385 } 386 } 387 388 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 389 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 390 } 391 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 392 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 393 394 if (aij->rowvalues) { 395 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 396 aij->rowvalues = 0; 397 } 398 399 /* used by MatAXPY() */ 400 a->xtoy = 0; b->xtoy = 0; 401 a->XtoY = 0; b->XtoY = 0; 402 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 408 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 409 { 410 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 411 PetscErrorCode ierr; 412 413 PetscFunctionBegin; 414 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 415 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatZeroRows_MPIAIJ" 421 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag) 422 { 423 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 424 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 425 int *nprocs,j,idx,nsends,row; 426 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 427 int *rvalues,tag = A->tag,count,base,slen,n,*source; 428 int *lens,imdex,*lrows,*values,rstart=l->rstart; 429 MPI_Comm comm = A->comm; 430 MPI_Request *send_waits,*recv_waits; 431 MPI_Status recv_status,*send_status; 432 IS istmp; 433 PetscTruth found; 434 435 PetscFunctionBegin; 436 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 437 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 438 439 /* first count number of contributors to each processor */ 440 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 441 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 442 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 443 for (i=0; i<N; i++) { 444 idx = rows[i]; 445 found = PETSC_FALSE; 446 for (j=0; j<size; j++) { 447 if (idx >= owners[j] && idx < owners[j+1]) { 448 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 449 } 450 } 451 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 452 } 453 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 454 455 /* inform other processors of number of messages and max length*/ 456 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 457 458 /* post receives: */ 459 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 460 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 461 for (i=0; i<nrecvs; i++) { 462 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 463 } 464 465 /* do sends: 466 1) starts[i] gives the starting index in svalues for stuff going to 467 the ith processor 468 */ 469 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 470 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 471 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 472 starts[0] = 0; 473 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 474 for (i=0; i<N; i++) { 475 svalues[starts[owner[i]]++] = rows[i]; 476 } 477 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 478 479 starts[0] = 0; 480 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 481 count = 0; 482 for (i=0; i<size; i++) { 483 if (nprocs[2*i+1]) { 484 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 485 } 486 } 487 ierr = PetscFree(starts);CHKERRQ(ierr); 488 489 base = owners[rank]; 490 491 /* wait on receives */ 492 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 493 source = lens + nrecvs; 494 count = nrecvs; slen = 0; 495 while (count) { 496 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 497 /* unpack receives into our local space */ 498 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 499 source[imdex] = recv_status.MPI_SOURCE; 500 lens[imdex] = n; 501 slen += n; 502 count--; 503 } 504 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 505 506 /* move the data into the send scatter */ 507 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 508 count = 0; 509 for (i=0; i<nrecvs; i++) { 510 values = rvalues + i*nmax; 511 for (j=0; j<lens[i]; j++) { 512 lrows[count++] = values[j] - base; 513 } 514 } 515 ierr = PetscFree(rvalues);CHKERRQ(ierr); 516 ierr = PetscFree(lens);CHKERRQ(ierr); 517 ierr = PetscFree(owner);CHKERRQ(ierr); 518 ierr = PetscFree(nprocs);CHKERRQ(ierr); 519 520 /* actually zap the local rows */ 521 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 522 PetscLogObjectParent(A,istmp); 523 524 /* 525 Zero the required rows. If the "diagonal block" of the matrix 526 is square and the user wishes to set the diagonal we use seperate 527 code so that MatSetValues() is not called for each diagonal allocating 528 new memory, thus calling lots of mallocs and slowing things down. 529 530 Contributed by: Mathew Knepley 531 */ 532 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 533 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 534 if (diag && (l->A->M == l->A->N)) { 535 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 536 } else if (diag) { 537 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 538 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 539 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 540 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 541 } 542 for (i = 0; i < slen; i++) { 543 row = lrows[i] + rstart; 544 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 545 } 546 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 547 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 548 } else { 549 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 550 } 551 ierr = ISDestroy(istmp);CHKERRQ(ierr); 552 ierr = PetscFree(lrows);CHKERRQ(ierr); 553 554 /* wait on sends */ 555 if (nsends) { 556 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 557 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 558 ierr = PetscFree(send_status);CHKERRQ(ierr); 559 } 560 ierr = PetscFree(send_waits);CHKERRQ(ierr); 561 ierr = PetscFree(svalues);CHKERRQ(ierr); 562 563 PetscFunctionReturn(0); 564 } 565 566 #undef __FUNCT__ 567 #define __FUNCT__ "MatMult_MPIAIJ" 568 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 569 { 570 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 571 PetscErrorCode ierr; 572 int nt; 573 574 PetscFunctionBegin; 575 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 576 if (nt != A->n) { 577 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 578 } 579 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 580 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 581 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 582 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 583 PetscFunctionReturn(0); 584 } 585 586 #undef __FUNCT__ 587 #define __FUNCT__ "MatMultAdd_MPIAIJ" 588 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 589 { 590 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 591 PetscErrorCode ierr; 592 593 PetscFunctionBegin; 594 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 595 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 596 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 597 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 598 PetscFunctionReturn(0); 599 } 600 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 603 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 604 { 605 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 606 PetscErrorCode ierr; 607 608 PetscFunctionBegin; 609 /* do nondiagonal part */ 610 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 611 /* send it on its way */ 612 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 613 /* do local part */ 614 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 615 /* receive remote parts: note this assumes the values are not actually */ 616 /* inserted in yy until the next line, which is true for my implementation*/ 617 /* but is not perhaps always true. */ 618 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 619 PetscFunctionReturn(0); 620 } 621 622 EXTERN_C_BEGIN 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 625 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f) 626 { 627 MPI_Comm comm; 628 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 629 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 630 IS Me,Notme; 631 int M,N,first,last,*notme,ntids,i, ierr; 632 633 PetscFunctionBegin; 634 635 /* Easy test: symmetric diagonal block */ 636 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 637 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 638 if (!*f) PetscFunctionReturn(0); 639 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 640 ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr); 641 if (ntids==1) PetscFunctionReturn(0); 642 643 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 644 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 645 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 646 ierr = PetscMalloc((N-last+first)*sizeof(int),¬me);CHKERRQ(ierr); 647 for (i=0; i<first; i++) notme[i] = i; 648 for (i=last; i<M; i++) notme[i-last+first] = i; 649 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr); 650 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 651 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 652 Aoff = Aoffs[0]; 653 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 654 Boff = Boffs[0]; 655 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 656 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 657 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 658 ierr = ISDestroy(Me);CHKERRQ(ierr); 659 ierr = ISDestroy(Notme);CHKERRQ(ierr); 660 661 PetscFunctionReturn(0); 662 } 663 EXTERN_C_END 664 665 #undef __FUNCT__ 666 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 667 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 668 { 669 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 670 PetscErrorCode ierr; 671 672 PetscFunctionBegin; 673 /* do nondiagonal part */ 674 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 675 /* send it on its way */ 676 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 677 /* do local part */ 678 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 679 /* receive remote parts: note this assumes the values are not actually */ 680 /* inserted in yy until the next line, which is true for my implementation*/ 681 /* but is not perhaps always true. */ 682 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 683 PetscFunctionReturn(0); 684 } 685 686 /* 687 This only works correctly for square matrices where the subblock A->A is the 688 diagonal block 689 */ 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 692 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 693 { 694 PetscErrorCode ierr; 695 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 696 697 PetscFunctionBegin; 698 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 699 if (a->rstart != a->cstart || a->rend != a->cend) { 700 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 701 } 702 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 703 PetscFunctionReturn(0); 704 } 705 706 #undef __FUNCT__ 707 #define __FUNCT__ "MatScale_MPIAIJ" 708 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A) 709 { 710 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 711 PetscErrorCode ierr; 712 713 PetscFunctionBegin; 714 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 715 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 716 PetscFunctionReturn(0); 717 } 718 719 #undef __FUNCT__ 720 #define __FUNCT__ "MatDestroy_MPIAIJ" 721 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 722 { 723 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 724 PetscErrorCode ierr; 725 726 PetscFunctionBegin; 727 #if defined(PETSC_USE_LOG) 728 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 729 #endif 730 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 731 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 732 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 733 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 734 #if defined (PETSC_USE_CTABLE) 735 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 736 #else 737 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 738 #endif 739 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 740 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 741 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 742 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 743 ierr = PetscFree(aij);CHKERRQ(ierr); 744 PetscFunctionReturn(0); 745 } 746 747 #undef __FUNCT__ 748 #define __FUNCT__ "MatView_MPIAIJ_Binary" 749 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 750 { 751 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 752 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 753 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 754 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 755 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 756 PetscScalar *column_values; 757 758 PetscFunctionBegin; 759 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 760 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 761 nz = A->nz + B->nz; 762 if (!rank) { 763 header[0] = MAT_FILE_COOKIE; 764 header[1] = mat->M; 765 header[2] = mat->N; 766 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 767 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 768 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 769 /* get largest number of rows any processor has */ 770 rlen = mat->m; 771 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 772 for (i=1; i<size; i++) { 773 rlen = PetscMax(rlen,range[i+1] - range[i]); 774 } 775 } else { 776 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 777 rlen = mat->m; 778 } 779 780 /* load up the local row counts */ 781 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 782 for (i=0; i<mat->m; i++) { 783 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 784 } 785 786 /* store the row lengths to the file */ 787 if (!rank) { 788 MPI_Status status; 789 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 790 for (i=1; i<size; i++) { 791 rlen = range[i+1] - range[i]; 792 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 793 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 794 } 795 } else { 796 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 797 } 798 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 799 800 /* load up the local column indices */ 801 nzmax = nz; /* )th processor needs space a largest processor needs */ 802 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 803 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 804 cnt = 0; 805 for (i=0; i<mat->m; i++) { 806 for (j=B->i[i]; j<B->i[i+1]; j++) { 807 if ( (col = garray[B->j[j]]) > cstart) break; 808 column_indices[cnt++] = col; 809 } 810 for (k=A->i[i]; k<A->i[i+1]; k++) { 811 column_indices[cnt++] = A->j[k] + cstart; 812 } 813 for (; j<B->i[i+1]; j++) { 814 column_indices[cnt++] = garray[B->j[j]]; 815 } 816 } 817 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 818 819 /* store the column indices to the file */ 820 if (!rank) { 821 MPI_Status status; 822 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 823 for (i=1; i<size; i++) { 824 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 825 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 826 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 827 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 828 } 829 } else { 830 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 831 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 832 } 833 ierr = PetscFree(column_indices);CHKERRQ(ierr); 834 835 /* load up the local column values */ 836 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 837 cnt = 0; 838 for (i=0; i<mat->m; i++) { 839 for (j=B->i[i]; j<B->i[i+1]; j++) { 840 if ( garray[B->j[j]] > cstart) break; 841 column_values[cnt++] = B->a[j]; 842 } 843 for (k=A->i[i]; k<A->i[i+1]; k++) { 844 column_values[cnt++] = A->a[k]; 845 } 846 for (; j<B->i[i+1]; j++) { 847 column_values[cnt++] = B->a[j]; 848 } 849 } 850 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 851 852 /* store the column values to the file */ 853 if (!rank) { 854 MPI_Status status; 855 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 856 for (i=1; i<size; i++) { 857 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 858 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 859 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 860 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 861 } 862 } else { 863 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 864 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 865 } 866 ierr = PetscFree(column_values);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 #undef __FUNCT__ 871 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 872 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 873 { 874 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 875 PetscErrorCode ierr; 876 int rank = aij->rank,size = aij->size; 877 PetscTruth isdraw,iascii,flg,isbinary; 878 PetscViewer sviewer; 879 PetscViewerFormat format; 880 881 PetscFunctionBegin; 882 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 883 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 884 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 885 if (iascii) { 886 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 887 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 888 MatInfo info; 889 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 890 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 891 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 892 if (flg) { 893 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 894 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 895 } else { 896 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 897 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 898 } 899 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 900 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 901 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 902 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 903 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 904 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 905 PetscFunctionReturn(0); 906 } else if (format == PETSC_VIEWER_ASCII_INFO) { 907 PetscFunctionReturn(0); 908 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 909 PetscFunctionReturn(0); 910 } 911 } else if (isbinary) { 912 if (size == 1) { 913 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 914 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 915 } else { 916 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 917 } 918 PetscFunctionReturn(0); 919 } else if (isdraw) { 920 PetscDraw draw; 921 PetscTruth isnull; 922 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 923 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 924 } 925 926 if (size == 1) { 927 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 928 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 929 } else { 930 /* assemble the entire matrix onto first processor. */ 931 Mat A; 932 Mat_SeqAIJ *Aloc; 933 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 934 PetscScalar *a; 935 936 if (!rank) { 937 ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr); 938 } else { 939 ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr); 940 } 941 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 942 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 943 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 944 PetscLogObjectParent(mat,A); 945 946 /* copy over the A part */ 947 Aloc = (Mat_SeqAIJ*)aij->A->data; 948 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 949 row = aij->rstart; 950 for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;} 951 for (i=0; i<m; i++) { 952 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 953 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 954 } 955 aj = Aloc->j; 956 for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;} 957 958 /* copy over the B part */ 959 Aloc = (Mat_SeqAIJ*)aij->B->data; 960 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 961 row = aij->rstart; 962 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 963 ct = cols; 964 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 965 for (i=0; i<m; i++) { 966 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 967 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 968 } 969 ierr = PetscFree(ct);CHKERRQ(ierr); 970 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 971 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 972 /* 973 Everyone has to call to draw the matrix since the graphics waits are 974 synchronized across all processors that share the PetscDraw object 975 */ 976 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 977 if (!rank) { 978 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 979 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 980 } 981 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 982 ierr = MatDestroy(A);CHKERRQ(ierr); 983 } 984 PetscFunctionReturn(0); 985 } 986 987 #undef __FUNCT__ 988 #define __FUNCT__ "MatView_MPIAIJ" 989 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 990 { 991 PetscErrorCode ierr; 992 PetscTruth iascii,isdraw,issocket,isbinary; 993 994 PetscFunctionBegin; 995 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 996 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 997 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 998 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 999 if (iascii || isdraw || isbinary || issocket) { 1000 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1001 } else { 1002 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1003 } 1004 PetscFunctionReturn(0); 1005 } 1006 1007 1008 1009 #undef __FUNCT__ 1010 #define __FUNCT__ "MatRelax_MPIAIJ" 1011 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1012 { 1013 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1014 PetscErrorCode ierr; 1015 Vec bb1; 1016 PetscScalar mone=-1.0; 1017 1018 PetscFunctionBegin; 1019 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1020 1021 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1022 1023 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1024 if (flag & SOR_ZERO_INITIAL_GUESS) { 1025 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1026 its--; 1027 } 1028 1029 while (its--) { 1030 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1031 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1032 1033 /* update rhs: bb1 = bb - B*x */ 1034 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1035 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1036 1037 /* local sweep */ 1038 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1039 CHKERRQ(ierr); 1040 } 1041 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1042 if (flag & SOR_ZERO_INITIAL_GUESS) { 1043 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1044 its--; 1045 } 1046 while (its--) { 1047 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1048 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1049 1050 /* update rhs: bb1 = bb - B*x */ 1051 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1052 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1053 1054 /* local sweep */ 1055 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1056 CHKERRQ(ierr); 1057 } 1058 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1059 if (flag & SOR_ZERO_INITIAL_GUESS) { 1060 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1061 its--; 1062 } 1063 while (its--) { 1064 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1065 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1066 1067 /* update rhs: bb1 = bb - B*x */ 1068 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1069 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1070 1071 /* local sweep */ 1072 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1073 CHKERRQ(ierr); 1074 } 1075 } else { 1076 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1077 } 1078 1079 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1080 PetscFunctionReturn(0); 1081 } 1082 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1085 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1086 { 1087 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1088 Mat A = mat->A,B = mat->B; 1089 PetscErrorCode ierr; 1090 PetscReal isend[5],irecv[5]; 1091 1092 PetscFunctionBegin; 1093 info->block_size = 1.0; 1094 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1095 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1096 isend[3] = info->memory; isend[4] = info->mallocs; 1097 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1098 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1099 isend[3] += info->memory; isend[4] += info->mallocs; 1100 if (flag == MAT_LOCAL) { 1101 info->nz_used = isend[0]; 1102 info->nz_allocated = isend[1]; 1103 info->nz_unneeded = isend[2]; 1104 info->memory = isend[3]; 1105 info->mallocs = isend[4]; 1106 } else if (flag == MAT_GLOBAL_MAX) { 1107 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1108 info->nz_used = irecv[0]; 1109 info->nz_allocated = irecv[1]; 1110 info->nz_unneeded = irecv[2]; 1111 info->memory = irecv[3]; 1112 info->mallocs = irecv[4]; 1113 } else if (flag == MAT_GLOBAL_SUM) { 1114 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1115 info->nz_used = irecv[0]; 1116 info->nz_allocated = irecv[1]; 1117 info->nz_unneeded = irecv[2]; 1118 info->memory = irecv[3]; 1119 info->mallocs = irecv[4]; 1120 } 1121 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1122 info->fill_ratio_needed = 0; 1123 info->factor_mallocs = 0; 1124 info->rows_global = (double)matin->M; 1125 info->columns_global = (double)matin->N; 1126 info->rows_local = (double)matin->m; 1127 info->columns_local = (double)matin->N; 1128 1129 PetscFunctionReturn(0); 1130 } 1131 1132 #undef __FUNCT__ 1133 #define __FUNCT__ "MatSetOption_MPIAIJ" 1134 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op) 1135 { 1136 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1137 PetscErrorCode ierr; 1138 1139 PetscFunctionBegin; 1140 switch (op) { 1141 case MAT_NO_NEW_NONZERO_LOCATIONS: 1142 case MAT_YES_NEW_NONZERO_LOCATIONS: 1143 case MAT_COLUMNS_UNSORTED: 1144 case MAT_COLUMNS_SORTED: 1145 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1146 case MAT_KEEP_ZEROED_ROWS: 1147 case MAT_NEW_NONZERO_LOCATION_ERR: 1148 case MAT_USE_INODES: 1149 case MAT_DO_NOT_USE_INODES: 1150 case MAT_IGNORE_ZERO_ENTRIES: 1151 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1152 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1153 break; 1154 case MAT_ROW_ORIENTED: 1155 a->roworiented = PETSC_TRUE; 1156 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1157 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1158 break; 1159 case MAT_ROWS_SORTED: 1160 case MAT_ROWS_UNSORTED: 1161 case MAT_YES_NEW_DIAGONALS: 1162 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1163 break; 1164 case MAT_COLUMN_ORIENTED: 1165 a->roworiented = PETSC_FALSE; 1166 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1167 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1168 break; 1169 case MAT_IGNORE_OFF_PROC_ENTRIES: 1170 a->donotstash = PETSC_TRUE; 1171 break; 1172 case MAT_NO_NEW_DIAGONALS: 1173 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1174 case MAT_SYMMETRIC: 1175 case MAT_STRUCTURALLY_SYMMETRIC: 1176 case MAT_HERMITIAN: 1177 case MAT_SYMMETRY_ETERNAL: 1178 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1179 break; 1180 case MAT_NOT_SYMMETRIC: 1181 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 1182 case MAT_NOT_HERMITIAN: 1183 case MAT_NOT_SYMMETRY_ETERNAL: 1184 break; 1185 default: 1186 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1187 } 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "MatGetRow_MPIAIJ" 1193 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1194 { 1195 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1196 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1197 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1198 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1199 int *cmap,*idx_p; 1200 1201 PetscFunctionBegin; 1202 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1203 mat->getrowactive = PETSC_TRUE; 1204 1205 if (!mat->rowvalues && (idx || v)) { 1206 /* 1207 allocate enough space to hold information from the longest row. 1208 */ 1209 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1210 int max = 1,tmp; 1211 for (i=0; i<matin->m; i++) { 1212 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1213 if (max < tmp) { max = tmp; } 1214 } 1215 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1216 mat->rowindices = (int*)(mat->rowvalues + max); 1217 } 1218 1219 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1220 lrow = row - rstart; 1221 1222 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1223 if (!v) {pvA = 0; pvB = 0;} 1224 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1225 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1226 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1227 nztot = nzA + nzB; 1228 1229 cmap = mat->garray; 1230 if (v || idx) { 1231 if (nztot) { 1232 /* Sort by increasing column numbers, assuming A and B already sorted */ 1233 int imark = -1; 1234 if (v) { 1235 *v = v_p = mat->rowvalues; 1236 for (i=0; i<nzB; i++) { 1237 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1238 else break; 1239 } 1240 imark = i; 1241 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1242 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1243 } 1244 if (idx) { 1245 *idx = idx_p = mat->rowindices; 1246 if (imark > -1) { 1247 for (i=0; i<imark; i++) { 1248 idx_p[i] = cmap[cworkB[i]]; 1249 } 1250 } else { 1251 for (i=0; i<nzB; i++) { 1252 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1253 else break; 1254 } 1255 imark = i; 1256 } 1257 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1258 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1259 } 1260 } else { 1261 if (idx) *idx = 0; 1262 if (v) *v = 0; 1263 } 1264 } 1265 *nz = nztot; 1266 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1267 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1273 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1274 { 1275 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1276 1277 PetscFunctionBegin; 1278 if (aij->getrowactive == PETSC_FALSE) { 1279 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1280 } 1281 aij->getrowactive = PETSC_FALSE; 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatNorm_MPIAIJ" 1287 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1288 { 1289 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1290 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1291 PetscErrorCode ierr; 1292 int i,j,cstart = aij->cstart; 1293 PetscReal sum = 0.0; 1294 PetscScalar *v; 1295 1296 PetscFunctionBegin; 1297 if (aij->size == 1) { 1298 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1299 } else { 1300 if (type == NORM_FROBENIUS) { 1301 v = amat->a; 1302 for (i=0; i<amat->nz; i++) { 1303 #if defined(PETSC_USE_COMPLEX) 1304 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1305 #else 1306 sum += (*v)*(*v); v++; 1307 #endif 1308 } 1309 v = bmat->a; 1310 for (i=0; i<bmat->nz; i++) { 1311 #if defined(PETSC_USE_COMPLEX) 1312 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1313 #else 1314 sum += (*v)*(*v); v++; 1315 #endif 1316 } 1317 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1318 *norm = sqrt(*norm); 1319 } else if (type == NORM_1) { /* max column norm */ 1320 PetscReal *tmp,*tmp2; 1321 int *jj,*garray = aij->garray; 1322 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1323 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1324 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1325 *norm = 0.0; 1326 v = amat->a; jj = amat->j; 1327 for (j=0; j<amat->nz; j++) { 1328 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1329 } 1330 v = bmat->a; jj = bmat->j; 1331 for (j=0; j<bmat->nz; j++) { 1332 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1333 } 1334 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1335 for (j=0; j<mat->N; j++) { 1336 if (tmp2[j] > *norm) *norm = tmp2[j]; 1337 } 1338 ierr = PetscFree(tmp);CHKERRQ(ierr); 1339 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1340 } else if (type == NORM_INFINITY) { /* max row norm */ 1341 PetscReal ntemp = 0.0; 1342 for (j=0; j<aij->A->m; j++) { 1343 v = amat->a + amat->i[j]; 1344 sum = 0.0; 1345 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1346 sum += PetscAbsScalar(*v); v++; 1347 } 1348 v = bmat->a + bmat->i[j]; 1349 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1350 sum += PetscAbsScalar(*v); v++; 1351 } 1352 if (sum > ntemp) ntemp = sum; 1353 } 1354 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1355 } else { 1356 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1357 } 1358 } 1359 PetscFunctionReturn(0); 1360 } 1361 1362 #undef __FUNCT__ 1363 #define __FUNCT__ "MatTranspose_MPIAIJ" 1364 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout) 1365 { 1366 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1367 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1368 PetscErrorCode ierr; 1369 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1370 Mat B; 1371 PetscScalar *array; 1372 1373 PetscFunctionBegin; 1374 if (!matout && M != N) { 1375 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1376 } 1377 1378 ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr); 1379 ierr = MatSetType(B,A->type_name);CHKERRQ(ierr); 1380 ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1381 1382 /* copy over the A part */ 1383 Aloc = (Mat_SeqAIJ*)a->A->data; 1384 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1385 row = a->rstart; 1386 for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;} 1387 for (i=0; i<m; i++) { 1388 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1389 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1390 } 1391 aj = Aloc->j; 1392 for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;} 1393 1394 /* copy over the B part */ 1395 Aloc = (Mat_SeqAIJ*)a->B->data; 1396 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1397 row = a->rstart; 1398 ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr); 1399 ct = cols; 1400 for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];} 1401 for (i=0; i<m; i++) { 1402 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1403 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1404 } 1405 ierr = PetscFree(ct);CHKERRQ(ierr); 1406 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1407 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1408 if (matout) { 1409 *matout = B; 1410 } else { 1411 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1412 } 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1418 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1419 { 1420 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1421 Mat a = aij->A,b = aij->B; 1422 PetscErrorCode ierr; 1423 int s1,s2,s3; 1424 1425 PetscFunctionBegin; 1426 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1427 if (rr) { 1428 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1429 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1430 /* Overlap communication with computation. */ 1431 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1432 } 1433 if (ll) { 1434 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1435 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1436 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1437 } 1438 /* scale the diagonal block */ 1439 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1440 1441 if (rr) { 1442 /* Do a scatter end and then right scale the off-diagonal block */ 1443 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1444 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1445 } 1446 1447 PetscFunctionReturn(0); 1448 } 1449 1450 1451 #undef __FUNCT__ 1452 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1453 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A) 1454 { 1455 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1456 PetscErrorCode ierr; 1457 1458 PetscFunctionBegin; 1459 if (!a->rank) { 1460 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1461 } 1462 PetscFunctionReturn(0); 1463 } 1464 1465 #undef __FUNCT__ 1466 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1467 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1468 { 1469 PetscFunctionBegin; 1470 *bs = 1; 1471 PetscFunctionReturn(0); 1472 } 1473 #undef __FUNCT__ 1474 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1475 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1476 { 1477 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1478 PetscErrorCode ierr; 1479 1480 PetscFunctionBegin; 1481 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatEqual_MPIAIJ" 1487 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1488 { 1489 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1490 Mat a,b,c,d; 1491 PetscTruth flg; 1492 PetscErrorCode ierr; 1493 1494 PetscFunctionBegin; 1495 a = matA->A; b = matA->B; 1496 c = matB->A; d = matB->B; 1497 1498 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1499 if (flg == PETSC_TRUE) { 1500 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1501 } 1502 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1503 PetscFunctionReturn(0); 1504 } 1505 1506 #undef __FUNCT__ 1507 #define __FUNCT__ "MatCopy_MPIAIJ" 1508 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1509 { 1510 PetscErrorCode ierr; 1511 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1512 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1513 1514 PetscFunctionBegin; 1515 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1516 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1517 /* because of the column compression in the off-processor part of the matrix a->B, 1518 the number of columns in a->B and b->B may be different, hence we cannot call 1519 the MatCopy() directly on the two parts. If need be, we can provide a more 1520 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1521 then copying the submatrices */ 1522 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1523 } else { 1524 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1525 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1526 } 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1532 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A) 1533 { 1534 PetscErrorCode ierr; 1535 1536 PetscFunctionBegin; 1537 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 #include "petscblaslapack.h" 1542 #undef __FUNCT__ 1543 #define __FUNCT__ "MatAXPY_MPIAIJ" 1544 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str) 1545 { 1546 PetscErrorCode ierr; 1547 int one=1,i; 1548 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1549 Mat_SeqAIJ *x,*y; 1550 1551 PetscFunctionBegin; 1552 if (str == SAME_NONZERO_PATTERN) { 1553 x = (Mat_SeqAIJ *)xx->A->data; 1554 y = (Mat_SeqAIJ *)yy->A->data; 1555 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1556 x = (Mat_SeqAIJ *)xx->B->data; 1557 y = (Mat_SeqAIJ *)yy->B->data; 1558 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1559 } else if (str == SUBSET_NONZERO_PATTERN) { 1560 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1561 1562 x = (Mat_SeqAIJ *)xx->B->data; 1563 y = (Mat_SeqAIJ *)yy->B->data; 1564 if (y->xtoy && y->XtoY != xx->B) { 1565 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1566 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1567 } 1568 if (!y->xtoy) { /* get xtoy */ 1569 ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 1570 y->XtoY = xx->B; 1571 } 1572 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]); 1573 } else { 1574 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1575 } 1576 PetscFunctionReturn(0); 1577 } 1578 1579 /* -------------------------------------------------------------------*/ 1580 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1581 MatGetRow_MPIAIJ, 1582 MatRestoreRow_MPIAIJ, 1583 MatMult_MPIAIJ, 1584 /* 4*/ MatMultAdd_MPIAIJ, 1585 MatMultTranspose_MPIAIJ, 1586 MatMultTransposeAdd_MPIAIJ, 1587 0, 1588 0, 1589 0, 1590 /*10*/ 0, 1591 0, 1592 0, 1593 MatRelax_MPIAIJ, 1594 MatTranspose_MPIAIJ, 1595 /*15*/ MatGetInfo_MPIAIJ, 1596 MatEqual_MPIAIJ, 1597 MatGetDiagonal_MPIAIJ, 1598 MatDiagonalScale_MPIAIJ, 1599 MatNorm_MPIAIJ, 1600 /*20*/ MatAssemblyBegin_MPIAIJ, 1601 MatAssemblyEnd_MPIAIJ, 1602 0, 1603 MatSetOption_MPIAIJ, 1604 MatZeroEntries_MPIAIJ, 1605 /*25*/ MatZeroRows_MPIAIJ, 1606 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1607 MatLUFactorSymbolic_MPIAIJ_TFS, 1608 #else 1609 0, 1610 #endif 1611 0, 1612 0, 1613 0, 1614 /*30*/ MatSetUpPreallocation_MPIAIJ, 1615 0, 1616 0, 1617 0, 1618 0, 1619 /*35*/ MatDuplicate_MPIAIJ, 1620 0, 1621 0, 1622 0, 1623 0, 1624 /*40*/ MatAXPY_MPIAIJ, 1625 MatGetSubMatrices_MPIAIJ, 1626 MatIncreaseOverlap_MPIAIJ, 1627 MatGetValues_MPIAIJ, 1628 MatCopy_MPIAIJ, 1629 /*45*/ MatPrintHelp_MPIAIJ, 1630 MatScale_MPIAIJ, 1631 0, 1632 0, 1633 0, 1634 /*50*/ MatGetBlockSize_MPIAIJ, 1635 0, 1636 0, 1637 0, 1638 0, 1639 /*55*/ MatFDColoringCreate_MPIAIJ, 1640 0, 1641 MatSetUnfactored_MPIAIJ, 1642 0, 1643 0, 1644 /*60*/ MatGetSubMatrix_MPIAIJ, 1645 MatDestroy_MPIAIJ, 1646 MatView_MPIAIJ, 1647 MatGetPetscMaps_Petsc, 1648 0, 1649 /*65*/ 0, 1650 0, 1651 0, 1652 0, 1653 0, 1654 /*70*/ 0, 1655 0, 1656 MatSetColoring_MPIAIJ, 1657 MatSetValuesAdic_MPIAIJ, 1658 MatSetValuesAdifor_MPIAIJ, 1659 /*75*/ 0, 1660 0, 1661 0, 1662 0, 1663 0, 1664 /*80*/ 0, 1665 0, 1666 0, 1667 0, 1668 /*85*/ MatLoad_MPIAIJ, 1669 0, 1670 0, 1671 0, 1672 0, 1673 /*90*/ 0, 1674 MatMatMult_MPIAIJ_MPIAIJ, 1675 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 1676 MatMatMultNumeric_MPIAIJ_MPIAIJ, 1677 }; 1678 1679 /* ----------------------------------------------------------------------------------------*/ 1680 1681 EXTERN_C_BEGIN 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1684 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 1685 { 1686 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1687 PetscErrorCode ierr; 1688 1689 PetscFunctionBegin; 1690 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1691 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1692 PetscFunctionReturn(0); 1693 } 1694 EXTERN_C_END 1695 1696 EXTERN_C_BEGIN 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1699 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 1700 { 1701 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1702 PetscErrorCode ierr; 1703 1704 PetscFunctionBegin; 1705 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1706 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1707 PetscFunctionReturn(0); 1708 } 1709 EXTERN_C_END 1710 1711 #include "petscpc.h" 1712 EXTERN_C_BEGIN 1713 #undef __FUNCT__ 1714 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 1715 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 1716 { 1717 Mat_MPIAIJ *b; 1718 PetscErrorCode ierr; 1719 int i; 1720 1721 PetscFunctionBegin; 1722 B->preallocated = PETSC_TRUE; 1723 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 1724 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 1725 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 1726 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 1727 if (d_nnz) { 1728 for (i=0; i<B->m; i++) { 1729 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1730 } 1731 } 1732 if (o_nnz) { 1733 for (i=0; i<B->m; i++) { 1734 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1735 } 1736 } 1737 b = (Mat_MPIAIJ*)B->data; 1738 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 1739 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 1740 1741 PetscFunctionReturn(0); 1742 } 1743 EXTERN_C_END 1744 1745 /*MC 1746 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 1747 1748 Options Database Keys: 1749 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 1750 1751 Level: beginner 1752 1753 .seealso: MatCreateMPIAIJ 1754 M*/ 1755 1756 EXTERN_C_BEGIN 1757 #undef __FUNCT__ 1758 #define __FUNCT__ "MatCreate_MPIAIJ" 1759 PetscErrorCode MatCreate_MPIAIJ(Mat B) 1760 { 1761 Mat_MPIAIJ *b; 1762 PetscErrorCode ierr; 1763 int i,size; 1764 1765 PetscFunctionBegin; 1766 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1767 1768 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1769 B->data = (void*)b; 1770 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1771 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1772 B->factor = 0; 1773 B->assembled = PETSC_FALSE; 1774 B->mapping = 0; 1775 1776 B->insertmode = NOT_SET_VALUES; 1777 b->size = size; 1778 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1779 1780 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1781 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1782 1783 /* the information in the maps duplicates the information computed below, eventually 1784 we should remove the duplicate information that is not contained in the maps */ 1785 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1786 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1787 1788 /* build local table of row and column ownerships */ 1789 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1790 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1791 b->cowners = b->rowners + b->size + 2; 1792 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1793 b->rowners[0] = 0; 1794 for (i=2; i<=b->size; i++) { 1795 b->rowners[i] += b->rowners[i-1]; 1796 } 1797 b->rstart = b->rowners[b->rank]; 1798 b->rend = b->rowners[b->rank+1]; 1799 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1800 b->cowners[0] = 0; 1801 for (i=2; i<=b->size; i++) { 1802 b->cowners[i] += b->cowners[i-1]; 1803 } 1804 b->cstart = b->cowners[b->rank]; 1805 b->cend = b->cowners[b->rank+1]; 1806 1807 /* build cache for off array entries formed */ 1808 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1809 b->donotstash = PETSC_FALSE; 1810 b->colmap = 0; 1811 b->garray = 0; 1812 b->roworiented = PETSC_TRUE; 1813 1814 /* stuff used for matrix vector multiply */ 1815 b->lvec = PETSC_NULL; 1816 b->Mvctx = PETSC_NULL; 1817 1818 /* stuff for MatGetRow() */ 1819 b->rowindices = 0; 1820 b->rowvalues = 0; 1821 b->getrowactive = PETSC_FALSE; 1822 1823 /* Explicitly create 2 MATSEQAIJ matrices. */ 1824 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr); 1825 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 1826 PetscLogObjectParent(B,b->A); 1827 ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr); 1828 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 1829 PetscLogObjectParent(B,b->B); 1830 1831 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1832 "MatStoreValues_MPIAIJ", 1833 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1834 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1835 "MatRetrieveValues_MPIAIJ", 1836 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1837 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1838 "MatGetDiagonalBlock_MPIAIJ", 1839 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1840 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 1841 "MatIsTranspose_MPIAIJ", 1842 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 1843 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 1844 "MatMPIAIJSetPreallocation_MPIAIJ", 1845 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 1846 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 1847 "MatDiagonalScaleLocal_MPIAIJ", 1848 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 1849 PetscFunctionReturn(0); 1850 } 1851 EXTERN_C_END 1852 1853 /*MC 1854 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 1855 1856 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 1857 and MATMPIAIJ otherwise. As a result, for single process communicators, 1858 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 1859 for communicators controlling multiple processes. It is recommended that you call both of 1860 the above preallocation routines for simplicity. 1861 1862 Options Database Keys: 1863 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 1864 1865 Level: beginner 1866 1867 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ 1868 M*/ 1869 1870 EXTERN_C_BEGIN 1871 #undef __FUNCT__ 1872 #define __FUNCT__ "MatCreate_AIJ" 1873 PetscErrorCode MatCreate_AIJ(Mat A) 1874 { 1875 PetscErrorCode ierr,size; 1876 1877 PetscFunctionBegin; 1878 ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr); 1879 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 1880 if (size == 1) { 1881 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 1882 } else { 1883 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1884 } 1885 PetscFunctionReturn(0); 1886 } 1887 EXTERN_C_END 1888 1889 #undef __FUNCT__ 1890 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1891 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1892 { 1893 Mat mat; 1894 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1895 PetscErrorCode ierr; 1896 1897 PetscFunctionBegin; 1898 *newmat = 0; 1899 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1900 ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr); 1901 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1902 a = (Mat_MPIAIJ*)mat->data; 1903 1904 mat->factor = matin->factor; 1905 mat->assembled = PETSC_TRUE; 1906 mat->insertmode = NOT_SET_VALUES; 1907 mat->preallocated = PETSC_TRUE; 1908 1909 a->rstart = oldmat->rstart; 1910 a->rend = oldmat->rend; 1911 a->cstart = oldmat->cstart; 1912 a->cend = oldmat->cend; 1913 a->size = oldmat->size; 1914 a->rank = oldmat->rank; 1915 a->donotstash = oldmat->donotstash; 1916 a->roworiented = oldmat->roworiented; 1917 a->rowindices = 0; 1918 a->rowvalues = 0; 1919 a->getrowactive = PETSC_FALSE; 1920 1921 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1922 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1923 if (oldmat->colmap) { 1924 #if defined (PETSC_USE_CTABLE) 1925 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1926 #else 1927 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1928 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1929 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1930 #endif 1931 } else a->colmap = 0; 1932 if (oldmat->garray) { 1933 int len; 1934 len = oldmat->B->n; 1935 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1936 PetscLogObjectMemory(mat,len*sizeof(int)); 1937 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1938 } else a->garray = 0; 1939 1940 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1941 PetscLogObjectParent(mat,a->lvec); 1942 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1943 PetscLogObjectParent(mat,a->Mvctx); 1944 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1945 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1946 PetscLogObjectParent(mat,a->A); 1947 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1948 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1949 PetscLogObjectParent(mat,a->B); 1950 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1951 *newmat = mat; 1952 PetscFunctionReturn(0); 1953 } 1954 1955 #include "petscsys.h" 1956 1957 #undef __FUNCT__ 1958 #define __FUNCT__ "MatLoad_MPIAIJ" 1959 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat) 1960 { 1961 Mat A; 1962 PetscScalar *vals,*svals; 1963 MPI_Comm comm = ((PetscObject)viewer)->comm; 1964 MPI_Status status; 1965 int i,nz,ierr,j,rstart,rend,fd; 1966 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1967 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1968 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1969 1970 PetscFunctionBegin; 1971 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1972 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1973 if (!rank) { 1974 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1975 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1976 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1977 if (header[3] < 0) { 1978 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1979 } 1980 } 1981 1982 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1983 M = header[1]; N = header[2]; 1984 /* determine ownership of all rows */ 1985 m = M/size + ((M % size) > rank); 1986 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1987 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1988 rowners[0] = 0; 1989 for (i=2; i<=size; i++) { 1990 rowners[i] += rowners[i-1]; 1991 } 1992 rstart = rowners[rank]; 1993 rend = rowners[rank+1]; 1994 1995 /* distribute row lengths to all processors */ 1996 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1997 offlens = ourlens + (rend-rstart); 1998 if (!rank) { 1999 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 2000 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2001 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 2002 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2003 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2004 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2005 } else { 2006 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2007 } 2008 2009 if (!rank) { 2010 /* calculate the number of nonzeros on each processor */ 2011 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 2012 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2013 for (i=0; i<size; i++) { 2014 for (j=rowners[i]; j< rowners[i+1]; j++) { 2015 procsnz[i] += rowlengths[j]; 2016 } 2017 } 2018 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2019 2020 /* determine max buffer needed and allocate it */ 2021 maxnz = 0; 2022 for (i=0; i<size; i++) { 2023 maxnz = PetscMax(maxnz,procsnz[i]); 2024 } 2025 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 2026 2027 /* read in my part of the matrix column indices */ 2028 nz = procsnz[0]; 2029 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 2030 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2031 2032 /* read in every one elses and ship off */ 2033 for (i=1; i<size; i++) { 2034 nz = procsnz[i]; 2035 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2036 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2037 } 2038 ierr = PetscFree(cols);CHKERRQ(ierr); 2039 } else { 2040 /* determine buffer space needed for message */ 2041 nz = 0; 2042 for (i=0; i<m; i++) { 2043 nz += ourlens[i]; 2044 } 2045 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 2046 2047 /* receive message of column indices*/ 2048 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2049 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2050 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2051 } 2052 2053 /* determine column ownership if matrix is not square */ 2054 if (N != M) { 2055 n = N/size + ((N % size) > rank); 2056 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2057 cstart = cend - n; 2058 } else { 2059 cstart = rstart; 2060 cend = rend; 2061 n = cend - cstart; 2062 } 2063 2064 /* loop over local rows, determining number of off diagonal entries */ 2065 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2066 jj = 0; 2067 for (i=0; i<m; i++) { 2068 for (j=0; j<ourlens[i]; j++) { 2069 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2070 jj++; 2071 } 2072 } 2073 2074 /* create our matrix */ 2075 for (i=0; i<m; i++) { 2076 ourlens[i] -= offlens[i]; 2077 } 2078 ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr); 2079 ierr = MatSetType(A,type);CHKERRQ(ierr); 2080 ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr); 2081 2082 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2083 for (i=0; i<m; i++) { 2084 ourlens[i] += offlens[i]; 2085 } 2086 2087 if (!rank) { 2088 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2089 2090 /* read in my part of the matrix numerical values */ 2091 nz = procsnz[0]; 2092 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2093 2094 /* insert into matrix */ 2095 jj = rstart; 2096 smycols = mycols; 2097 svals = vals; 2098 for (i=0; i<m; i++) { 2099 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2100 smycols += ourlens[i]; 2101 svals += ourlens[i]; 2102 jj++; 2103 } 2104 2105 /* read in other processors and ship out */ 2106 for (i=1; i<size; i++) { 2107 nz = procsnz[i]; 2108 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2109 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2110 } 2111 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2112 } else { 2113 /* receive numeric values */ 2114 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 2115 2116 /* receive message of values*/ 2117 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2118 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2119 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 2120 2121 /* insert into matrix */ 2122 jj = rstart; 2123 smycols = mycols; 2124 svals = vals; 2125 for (i=0; i<m; i++) { 2126 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2127 smycols += ourlens[i]; 2128 svals += ourlens[i]; 2129 jj++; 2130 } 2131 } 2132 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2133 ierr = PetscFree(vals);CHKERRQ(ierr); 2134 ierr = PetscFree(mycols);CHKERRQ(ierr); 2135 ierr = PetscFree(rowners);CHKERRQ(ierr); 2136 2137 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2138 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2139 *newmat = A; 2140 PetscFunctionReturn(0); 2141 } 2142 2143 #undef __FUNCT__ 2144 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2145 /* 2146 Not great since it makes two copies of the submatrix, first an SeqAIJ 2147 in local and then by concatenating the local matrices the end result. 2148 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2149 */ 2150 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2151 { 2152 PetscErrorCode ierr; 2153 int i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2154 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2155 Mat *local,M,Mreuse; 2156 PetscScalar *vwork,*aa; 2157 MPI_Comm comm = mat->comm; 2158 Mat_SeqAIJ *aij; 2159 2160 2161 PetscFunctionBegin; 2162 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2163 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2164 2165 if (call == MAT_REUSE_MATRIX) { 2166 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2167 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2168 local = &Mreuse; 2169 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2170 } else { 2171 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2172 Mreuse = *local; 2173 ierr = PetscFree(local);CHKERRQ(ierr); 2174 } 2175 2176 /* 2177 m - number of local rows 2178 n - number of columns (same on all processors) 2179 rstart - first row in new global matrix generated 2180 */ 2181 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2182 if (call == MAT_INITIAL_MATRIX) { 2183 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2184 ii = aij->i; 2185 jj = aij->j; 2186 2187 /* 2188 Determine the number of non-zeros in the diagonal and off-diagonal 2189 portions of the matrix in order to do correct preallocation 2190 */ 2191 2192 /* first get start and end of "diagonal" columns */ 2193 if (csize == PETSC_DECIDE) { 2194 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2195 if (mglobal == n) { /* square matrix */ 2196 nlocal = m; 2197 } else { 2198 nlocal = n/size + ((n % size) > rank); 2199 } 2200 } else { 2201 nlocal = csize; 2202 } 2203 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2204 rstart = rend - nlocal; 2205 if (rank == size - 1 && rend != n) { 2206 SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n); 2207 } 2208 2209 /* next, compute all the lengths */ 2210 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2211 olens = dlens + m; 2212 for (i=0; i<m; i++) { 2213 jend = ii[i+1] - ii[i]; 2214 olen = 0; 2215 dlen = 0; 2216 for (j=0; j<jend; j++) { 2217 if (*jj < rstart || *jj >= rend) olen++; 2218 else dlen++; 2219 jj++; 2220 } 2221 olens[i] = olen; 2222 dlens[i] = dlen; 2223 } 2224 ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr); 2225 ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr); 2226 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 2227 ierr = PetscFree(dlens);CHKERRQ(ierr); 2228 } else { 2229 int ml,nl; 2230 2231 M = *newmat; 2232 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2233 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2234 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2235 /* 2236 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2237 rather than the slower MatSetValues(). 2238 */ 2239 M->was_assembled = PETSC_TRUE; 2240 M->assembled = PETSC_FALSE; 2241 } 2242 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2243 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2244 ii = aij->i; 2245 jj = aij->j; 2246 aa = aij->a; 2247 for (i=0; i<m; i++) { 2248 row = rstart + i; 2249 nz = ii[i+1] - ii[i]; 2250 cwork = jj; jj += nz; 2251 vwork = aa; aa += nz; 2252 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2253 } 2254 2255 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2256 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2257 *newmat = M; 2258 2259 /* save submatrix used in processor for next request */ 2260 if (call == MAT_INITIAL_MATRIX) { 2261 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2262 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2263 } 2264 2265 PetscFunctionReturn(0); 2266 } 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2270 /*@C 2271 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2272 (the default parallel PETSc format). For good matrix assembly performance 2273 the user should preallocate the matrix storage by setting the parameters 2274 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2275 performance can be increased by more than a factor of 50. 2276 2277 Collective on MPI_Comm 2278 2279 Input Parameters: 2280 + A - the matrix 2281 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2282 (same value is used for all local rows) 2283 . d_nnz - array containing the number of nonzeros in the various rows of the 2284 DIAGONAL portion of the local submatrix (possibly different for each row) 2285 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2286 The size of this array is equal to the number of local rows, i.e 'm'. 2287 You must leave room for the diagonal entry even if it is zero. 2288 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2289 submatrix (same value is used for all local rows). 2290 - o_nnz - array containing the number of nonzeros in the various rows of the 2291 OFF-DIAGONAL portion of the local submatrix (possibly different for 2292 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2293 structure. The size of this array is equal to the number 2294 of local rows, i.e 'm'. 2295 2296 The AIJ format (also called the Yale sparse matrix format or 2297 compressed row storage), is fully compatible with standard Fortran 77 2298 storage. That is, the stored row and column indices can begin at 2299 either one (as in Fortran) or zero. See the users manual for details. 2300 2301 The user MUST specify either the local or global matrix dimensions 2302 (possibly both). 2303 2304 The parallel matrix is partitioned such that the first m0 rows belong to 2305 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2306 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2307 2308 The DIAGONAL portion of the local submatrix of a processor can be defined 2309 as the submatrix which is obtained by extraction the part corresponding 2310 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2311 first row that belongs to the processor, and r2 is the last row belonging 2312 to the this processor. This is a square mxm matrix. The remaining portion 2313 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2314 2315 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2316 2317 By default, this format uses inodes (identical nodes) when possible. 2318 We search for consecutive rows with the same nonzero structure, thereby 2319 reusing matrix information to achieve increased efficiency. 2320 2321 Options Database Keys: 2322 + -mat_aij_no_inode - Do not use inodes 2323 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2324 - -mat_aij_oneindex - Internally use indexing starting at 1 2325 rather than 0. Note that when calling MatSetValues(), 2326 the user still MUST index entries starting at 0! 2327 2328 Example usage: 2329 2330 Consider the following 8x8 matrix with 34 non-zero values, that is 2331 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2332 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2333 as follows: 2334 2335 .vb 2336 1 2 0 | 0 3 0 | 0 4 2337 Proc0 0 5 6 | 7 0 0 | 8 0 2338 9 0 10 | 11 0 0 | 12 0 2339 ------------------------------------- 2340 13 0 14 | 15 16 17 | 0 0 2341 Proc1 0 18 0 | 19 20 21 | 0 0 2342 0 0 0 | 22 23 0 | 24 0 2343 ------------------------------------- 2344 Proc2 25 26 27 | 0 0 28 | 29 0 2345 30 0 0 | 31 32 33 | 0 34 2346 .ve 2347 2348 This can be represented as a collection of submatrices as: 2349 2350 .vb 2351 A B C 2352 D E F 2353 G H I 2354 .ve 2355 2356 Where the submatrices A,B,C are owned by proc0, D,E,F are 2357 owned by proc1, G,H,I are owned by proc2. 2358 2359 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2360 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2361 The 'M','N' parameters are 8,8, and have the same values on all procs. 2362 2363 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2364 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2365 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2366 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2367 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2368 matrix, ans [DF] as another SeqAIJ matrix. 2369 2370 When d_nz, o_nz parameters are specified, d_nz storage elements are 2371 allocated for every row of the local diagonal submatrix, and o_nz 2372 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2373 One way to choose d_nz and o_nz is to use the max nonzerors per local 2374 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2375 In this case, the values of d_nz,o_nz are: 2376 .vb 2377 proc0 : dnz = 2, o_nz = 2 2378 proc1 : dnz = 3, o_nz = 2 2379 proc2 : dnz = 1, o_nz = 4 2380 .ve 2381 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2382 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2383 for proc3. i.e we are using 12+15+10=37 storage locations to store 2384 34 values. 2385 2386 When d_nnz, o_nnz parameters are specified, the storage is specified 2387 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2388 In the above case the values for d_nnz,o_nnz are: 2389 .vb 2390 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2391 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2392 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2393 .ve 2394 Here the space allocated is sum of all the above values i.e 34, and 2395 hence pre-allocation is perfect. 2396 2397 Level: intermediate 2398 2399 .keywords: matrix, aij, compressed row, sparse, parallel 2400 2401 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2402 @*/ 2403 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[]) 2404 { 2405 PetscErrorCode ierr,(*f)(Mat,int,const int[],int,const int[]); 2406 2407 PetscFunctionBegin; 2408 ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2409 if (f) { 2410 ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2411 } 2412 PetscFunctionReturn(0); 2413 } 2414 2415 #undef __FUNCT__ 2416 #define __FUNCT__ "MatCreateMPIAIJ" 2417 /*@C 2418 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2419 (the default parallel PETSc format). For good matrix assembly performance 2420 the user should preallocate the matrix storage by setting the parameters 2421 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2422 performance can be increased by more than a factor of 50. 2423 2424 Collective on MPI_Comm 2425 2426 Input Parameters: 2427 + comm - MPI communicator 2428 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2429 This value should be the same as the local size used in creating the 2430 y vector for the matrix-vector product y = Ax. 2431 . n - This value should be the same as the local size used in creating the 2432 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2433 calculated if N is given) For square matrices n is almost always m. 2434 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2435 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2436 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2437 (same value is used for all local rows) 2438 . d_nnz - array containing the number of nonzeros in the various rows of the 2439 DIAGONAL portion of the local submatrix (possibly different for each row) 2440 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2441 The size of this array is equal to the number of local rows, i.e 'm'. 2442 You must leave room for the diagonal entry even if it is zero. 2443 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2444 submatrix (same value is used for all local rows). 2445 - o_nnz - array containing the number of nonzeros in the various rows of the 2446 OFF-DIAGONAL portion of the local submatrix (possibly different for 2447 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2448 structure. The size of this array is equal to the number 2449 of local rows, i.e 'm'. 2450 2451 Output Parameter: 2452 . A - the matrix 2453 2454 Notes: 2455 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2456 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2457 storage requirements for this matrix. 2458 2459 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2460 processor than it must be used on all processors that share the object for 2461 that argument. 2462 2463 The AIJ format (also called the Yale sparse matrix format or 2464 compressed row storage), is fully compatible with standard Fortran 77 2465 storage. That is, the stored row and column indices can begin at 2466 either one (as in Fortran) or zero. See the users manual for details. 2467 2468 The user MUST specify either the local or global matrix dimensions 2469 (possibly both). 2470 2471 The parallel matrix is partitioned such that the first m0 rows belong to 2472 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2473 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2474 2475 The DIAGONAL portion of the local submatrix of a processor can be defined 2476 as the submatrix which is obtained by extraction the part corresponding 2477 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2478 first row that belongs to the processor, and r2 is the last row belonging 2479 to the this processor. This is a square mxm matrix. The remaining portion 2480 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2481 2482 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2483 2484 When calling this routine with a single process communicator, a matrix of 2485 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 2486 type of communicator, use the construction mechanism: 2487 MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...); 2488 2489 By default, this format uses inodes (identical nodes) when possible. 2490 We search for consecutive rows with the same nonzero structure, thereby 2491 reusing matrix information to achieve increased efficiency. 2492 2493 Options Database Keys: 2494 + -mat_aij_no_inode - Do not use inodes 2495 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2496 - -mat_aij_oneindex - Internally use indexing starting at 1 2497 rather than 0. Note that when calling MatSetValues(), 2498 the user still MUST index entries starting at 0! 2499 2500 2501 Example usage: 2502 2503 Consider the following 8x8 matrix with 34 non-zero values, that is 2504 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2505 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2506 as follows: 2507 2508 .vb 2509 1 2 0 | 0 3 0 | 0 4 2510 Proc0 0 5 6 | 7 0 0 | 8 0 2511 9 0 10 | 11 0 0 | 12 0 2512 ------------------------------------- 2513 13 0 14 | 15 16 17 | 0 0 2514 Proc1 0 18 0 | 19 20 21 | 0 0 2515 0 0 0 | 22 23 0 | 24 0 2516 ------------------------------------- 2517 Proc2 25 26 27 | 0 0 28 | 29 0 2518 30 0 0 | 31 32 33 | 0 34 2519 .ve 2520 2521 This can be represented as a collection of submatrices as: 2522 2523 .vb 2524 A B C 2525 D E F 2526 G H I 2527 .ve 2528 2529 Where the submatrices A,B,C are owned by proc0, D,E,F are 2530 owned by proc1, G,H,I are owned by proc2. 2531 2532 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2533 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2534 The 'M','N' parameters are 8,8, and have the same values on all procs. 2535 2536 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2537 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2538 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2539 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2540 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2541 matrix, ans [DF] as another SeqAIJ matrix. 2542 2543 When d_nz, o_nz parameters are specified, d_nz storage elements are 2544 allocated for every row of the local diagonal submatrix, and o_nz 2545 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2546 One way to choose d_nz and o_nz is to use the max nonzerors per local 2547 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2548 In this case, the values of d_nz,o_nz are: 2549 .vb 2550 proc0 : dnz = 2, o_nz = 2 2551 proc1 : dnz = 3, o_nz = 2 2552 proc2 : dnz = 1, o_nz = 4 2553 .ve 2554 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2555 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2556 for proc3. i.e we are using 12+15+10=37 storage locations to store 2557 34 values. 2558 2559 When d_nnz, o_nnz parameters are specified, the storage is specified 2560 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2561 In the above case the values for d_nnz,o_nnz are: 2562 .vb 2563 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2564 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2565 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2566 .ve 2567 Here the space allocated is sum of all the above values i.e 34, and 2568 hence pre-allocation is perfect. 2569 2570 Level: intermediate 2571 2572 .keywords: matrix, aij, compressed row, sparse, parallel 2573 2574 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2575 @*/ 2576 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A) 2577 { 2578 PetscErrorCode ierr,size; 2579 2580 PetscFunctionBegin; 2581 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2582 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2583 if (size > 1) { 2584 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2585 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2586 } else { 2587 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2588 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2589 } 2590 PetscFunctionReturn(0); 2591 } 2592 2593 #undef __FUNCT__ 2594 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2595 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[]) 2596 { 2597 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2598 PetscFunctionBegin; 2599 *Ad = a->A; 2600 *Ao = a->B; 2601 *colmap = a->garray; 2602 PetscFunctionReturn(0); 2603 } 2604 2605 #undef __FUNCT__ 2606 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2607 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2608 { 2609 PetscErrorCode ierr; 2610 int i; 2611 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2612 2613 PetscFunctionBegin; 2614 if (coloring->ctype == IS_COLORING_LOCAL) { 2615 ISColoringValue *allcolors,*colors; 2616 ISColoring ocoloring; 2617 2618 /* set coloring for diagonal portion */ 2619 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2620 2621 /* set coloring for off-diagonal portion */ 2622 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2623 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2624 for (i=0; i<a->B->n; i++) { 2625 colors[i] = allcolors[a->garray[i]]; 2626 } 2627 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2628 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2629 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2630 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2631 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2632 ISColoringValue *colors; 2633 int *larray; 2634 ISColoring ocoloring; 2635 2636 /* set coloring for diagonal portion */ 2637 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2638 for (i=0; i<a->A->n; i++) { 2639 larray[i] = i + a->cstart; 2640 } 2641 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2642 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2643 for (i=0; i<a->A->n; i++) { 2644 colors[i] = coloring->colors[larray[i]]; 2645 } 2646 ierr = PetscFree(larray);CHKERRQ(ierr); 2647 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2648 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2649 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2650 2651 /* set coloring for off-diagonal portion */ 2652 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2653 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2654 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2655 for (i=0; i<a->B->n; i++) { 2656 colors[i] = coloring->colors[larray[i]]; 2657 } 2658 ierr = PetscFree(larray);CHKERRQ(ierr); 2659 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2660 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2661 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2662 } else { 2663 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2664 } 2665 2666 PetscFunctionReturn(0); 2667 } 2668 2669 #undef __FUNCT__ 2670 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2671 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2672 { 2673 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2674 PetscErrorCode ierr; 2675 2676 PetscFunctionBegin; 2677 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2678 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2679 PetscFunctionReturn(0); 2680 } 2681 2682 #undef __FUNCT__ 2683 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2684 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2685 { 2686 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2687 PetscErrorCode ierr; 2688 2689 PetscFunctionBegin; 2690 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2691 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2692 PetscFunctionReturn(0); 2693 } 2694 2695 #undef __FUNCT__ 2696 #define __FUNCT__ "MatMerge" 2697 /*@C 2698 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2699 matrices from each processor 2700 2701 Collective on MPI_Comm 2702 2703 Input Parameters: 2704 + comm - the communicators the parallel matrix will live on 2705 . inmat - the input sequential matrices 2706 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2707 2708 Output Parameter: 2709 . outmat - the parallel matrix generated 2710 2711 Level: advanced 2712 2713 Notes: The number of columns of the matrix in EACH of the seperate files 2714 MUST be the same. 2715 2716 @*/ 2717 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,MatReuse scall,Mat *outmat) 2718 { 2719 PetscErrorCode ierr; 2720 int m,n,i,rstart,nnz,I,*dnz,*onz; 2721 const int *indx; 2722 const PetscScalar *values; 2723 PetscMap columnmap,rowmap; 2724 2725 PetscFunctionBegin; 2726 2727 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2728 if (scall == MAT_INITIAL_MATRIX){ 2729 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2730 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2731 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2732 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2733 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2734 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2735 2736 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2737 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2738 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2739 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2740 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2741 2742 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2743 for (i=0;i<m;i++) { 2744 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2745 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2746 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2747 } 2748 /* This routine will ONLY return MPIAIJ type matrix */ 2749 ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr); 2750 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 2751 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 2752 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2753 2754 } else if (scall == MAT_REUSE_MATRIX){ 2755 ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 2756 } else { 2757 SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall); 2758 } 2759 2760 for (i=0;i<m;i++) { 2761 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2762 I = i + rstart; 2763 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2764 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2765 } 2766 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2767 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2768 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2769 2770 PetscFunctionReturn(0); 2771 } 2772 2773 #undef __FUNCT__ 2774 #define __FUNCT__ "MatFileSplit" 2775 PetscErrorCode MatFileSplit(Mat A,char *outfile) 2776 { 2777 PetscErrorCode ierr; 2778 int rank,m,N,i,rstart,nnz; 2779 size_t len; 2780 const int *indx; 2781 PetscViewer out; 2782 char *name; 2783 Mat B; 2784 const PetscScalar *values; 2785 2786 PetscFunctionBegin; 2787 2788 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2789 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2790 /* Should this be the type of the diagonal block of A? */ 2791 ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr); 2792 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2793 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 2794 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2795 for (i=0;i<m;i++) { 2796 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2797 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2798 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2799 } 2800 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2801 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2802 2803 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2804 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2805 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2806 sprintf(name,"%s.%d",outfile,rank); 2807 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr); 2808 ierr = PetscFree(name); 2809 ierr = MatView(B,out);CHKERRQ(ierr); 2810 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2811 ierr = MatDestroy(B);CHKERRQ(ierr); 2812 PetscFunctionReturn(0); 2813 } 2814 2815 #undef __FUNCT__ 2816 #define __FUNCT__ "MatMerge_SeqsToMPI" 2817 /*@C 2818 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2819 matrices from each processor 2820 2821 Collective on MPI_Comm 2822 2823 Input Parameters: 2824 + comm - the communicators the parallel matrix will live on 2825 . inmat - the input sequential matrices 2826 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 2827 2828 Output Parameter: 2829 . outmat - the parallel matrix generated 2830 2831 Level: advanced 2832 2833 Notes: The number of columns of the matrix in EACH of the seperate files 2834 MUST be the same. 2835 2836 @*/ 2837 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat inmat,MatReuse scall,Mat *outmat) 2838 { 2839 PetscErrorCode ierr,size,rank; 2840 int m,n,i; 2841 2842 PetscFunctionBegin; 2843 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2844 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2845 ierr = PetscPrintf(PETSC_COMM_SELF," [%d] call MatMerge_SeqsToMPI...\n",rank); 2846 2847 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2848 2849 PetscFunctionReturn(0); 2850 } 2851