1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 int i,j,rstart,ncols,n,ierr,flg; 354 int *row,*col,other_disassembled; 355 PetscScalar *val; 356 InsertMode addv = mat->insertmode; 357 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) 358 PetscTruth flag; 359 #endif 360 361 PetscFunctionBegin; 362 if (!aij->donotstash) { 363 while (1) { 364 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 365 if (!flg) break; 366 367 for (i=0; i<n;) { 368 /* Now identify the consecutive vals belonging to the same row */ 369 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 370 if (j < n) ncols = j-i; 371 else ncols = n-i; 372 /* Now assemble all these values with a single function call */ 373 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 374 i = j; 375 } 376 } 377 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 378 } 379 380 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 381 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 382 383 /* determine if any processor has disassembled, if so we must 384 also disassemble ourselfs, in order that we may reassemble. */ 385 /* 386 if nonzero structure of submatrix B cannot change then we know that 387 no processor disassembled thus we can skip this stuff 388 */ 389 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 390 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 391 if (mat->was_assembled && !other_disassembled) { 392 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 393 } 394 } 395 396 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 397 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 398 } 399 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 400 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 401 402 if (aij->rowvalues) { 403 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 404 aij->rowvalues = 0; 405 } 406 #if defined(PETSC_HAVE_SUPERLUDIST) 407 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 408 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 409 #endif 410 411 #if defined(PETSC_HAVE_SPOOLES) 412 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 413 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 414 #endif 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNCT__ 419 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 420 int MatZeroEntries_MPIAIJ(Mat A) 421 { 422 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 423 int ierr; 424 425 PetscFunctionBegin; 426 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 427 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 428 PetscFunctionReturn(0); 429 } 430 431 #undef __FUNCT__ 432 #define __FUNCT__ "MatZeroRows_MPIAIJ" 433 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 434 { 435 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 436 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 437 int *nprocs,j,idx,nsends,row; 438 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 439 int *rvalues,tag = A->tag,count,base,slen,n,*source; 440 int *lens,imdex,*lrows,*values,rstart=l->rstart; 441 MPI_Comm comm = A->comm; 442 MPI_Request *send_waits,*recv_waits; 443 MPI_Status recv_status,*send_status; 444 IS istmp; 445 PetscTruth found; 446 447 PetscFunctionBegin; 448 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 449 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 450 451 /* first count number of contributors to each processor */ 452 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 453 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 454 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 455 for (i=0; i<N; i++) { 456 idx = rows[i]; 457 found = PETSC_FALSE; 458 for (j=0; j<size; j++) { 459 if (idx >= owners[j] && idx < owners[j+1]) { 460 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 461 } 462 } 463 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 464 } 465 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 466 467 /* inform other processors of number of messages and max length*/ 468 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 469 470 /* post receives: */ 471 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 472 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 473 for (i=0; i<nrecvs; i++) { 474 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 475 } 476 477 /* do sends: 478 1) starts[i] gives the starting index in svalues for stuff going to 479 the ith processor 480 */ 481 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 482 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 483 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 484 starts[0] = 0; 485 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 486 for (i=0; i<N; i++) { 487 svalues[starts[owner[i]]++] = rows[i]; 488 } 489 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 490 491 starts[0] = 0; 492 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 493 count = 0; 494 for (i=0; i<size; i++) { 495 if (nprocs[2*i+1]) { 496 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 497 } 498 } 499 ierr = PetscFree(starts);CHKERRQ(ierr); 500 501 base = owners[rank]; 502 503 /* wait on receives */ 504 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 505 source = lens + nrecvs; 506 count = nrecvs; slen = 0; 507 while (count) { 508 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 509 /* unpack receives into our local space */ 510 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 511 source[imdex] = recv_status.MPI_SOURCE; 512 lens[imdex] = n; 513 slen += n; 514 count--; 515 } 516 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 517 518 /* move the data into the send scatter */ 519 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 520 count = 0; 521 for (i=0; i<nrecvs; i++) { 522 values = rvalues + i*nmax; 523 for (j=0; j<lens[i]; j++) { 524 lrows[count++] = values[j] - base; 525 } 526 } 527 ierr = PetscFree(rvalues);CHKERRQ(ierr); 528 ierr = PetscFree(lens);CHKERRQ(ierr); 529 ierr = PetscFree(owner);CHKERRQ(ierr); 530 ierr = PetscFree(nprocs);CHKERRQ(ierr); 531 532 /* actually zap the local rows */ 533 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 534 PetscLogObjectParent(A,istmp); 535 536 /* 537 Zero the required rows. If the "diagonal block" of the matrix 538 is square and the user wishes to set the diagonal we use seperate 539 code so that MatSetValues() is not called for each diagonal allocating 540 new memory, thus calling lots of mallocs and slowing things down. 541 542 Contributed by: Mathew Knepley 543 */ 544 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 545 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 546 if (diag && (l->A->M == l->A->N)) { 547 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 548 } else if (diag) { 549 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 550 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 551 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 552 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 553 } 554 for (i = 0; i < slen; i++) { 555 row = lrows[i] + rstart; 556 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 557 } 558 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 559 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 560 } else { 561 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 562 } 563 ierr = ISDestroy(istmp);CHKERRQ(ierr); 564 ierr = PetscFree(lrows);CHKERRQ(ierr); 565 566 /* wait on sends */ 567 if (nsends) { 568 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 569 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 570 ierr = PetscFree(send_status);CHKERRQ(ierr); 571 } 572 ierr = PetscFree(send_waits);CHKERRQ(ierr); 573 ierr = PetscFree(svalues);CHKERRQ(ierr); 574 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatMult_MPIAIJ" 580 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 581 { 582 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 583 int ierr,nt; 584 585 PetscFunctionBegin; 586 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 587 if (nt != A->n) { 588 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 589 } 590 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 591 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 592 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 593 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatMultAdd_MPIAIJ" 599 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 600 { 601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 602 int ierr; 603 604 PetscFunctionBegin; 605 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 606 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 607 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 608 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 614 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 615 { 616 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 617 int ierr; 618 619 PetscFunctionBegin; 620 /* do nondiagonal part */ 621 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 622 /* send it on its way */ 623 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 624 /* do local part */ 625 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 626 /* receive remote parts: note this assumes the values are not actually */ 627 /* inserted in yy until the next line, which is true for my implementation*/ 628 /* but is not perhaps always true. */ 629 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 #undef __FUNCT__ 634 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 635 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 636 { 637 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 638 int ierr; 639 640 PetscFunctionBegin; 641 /* do nondiagonal part */ 642 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 643 /* send it on its way */ 644 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 645 /* do local part */ 646 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 647 /* receive remote parts: note this assumes the values are not actually */ 648 /* inserted in yy until the next line, which is true for my implementation*/ 649 /* but is not perhaps always true. */ 650 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 651 PetscFunctionReturn(0); 652 } 653 654 /* 655 This only works correctly for square matrices where the subblock A->A is the 656 diagonal block 657 */ 658 #undef __FUNCT__ 659 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 660 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 661 { 662 int ierr; 663 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 664 665 PetscFunctionBegin; 666 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 667 if (a->rstart != a->cstart || a->rend != a->cend) { 668 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 669 } 670 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 671 PetscFunctionReturn(0); 672 } 673 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatScale_MPIAIJ" 676 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 677 { 678 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 679 int ierr; 680 681 PetscFunctionBegin; 682 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 683 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 684 PetscFunctionReturn(0); 685 } 686 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatDestroy_MPIAIJ" 689 int MatDestroy_MPIAIJ(Mat mat) 690 { 691 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 692 int ierr; 693 694 PetscFunctionBegin; 695 #if defined(PETSC_USE_LOG) 696 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 697 #endif 698 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 699 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 700 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 701 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 702 #if defined (PETSC_USE_CTABLE) 703 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 704 #else 705 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 706 #endif 707 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 708 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 709 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 710 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 711 ierr = PetscFree(aij);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 716 extern int MatFactorInfo_Spooles(Mat,PetscViewer); 717 718 #undef __FUNCT__ 719 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 720 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 721 { 722 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 723 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 724 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 725 PetscTruth isdraw,isascii,flg; 726 PetscViewer sviewer; 727 PetscViewerFormat format; 728 729 PetscFunctionBegin; 730 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 731 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 732 if (isascii) { 733 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 734 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 735 MatInfo info; 736 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 737 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 738 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 739 if (flg) { 740 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 741 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 742 } else { 743 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 744 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 745 } 746 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 747 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 748 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 749 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 750 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 751 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 752 PetscFunctionReturn(0); 753 } else if (format == PETSC_VIEWER_ASCII_INFO) { 754 PetscFunctionReturn(0); 755 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 756 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) 757 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 758 #endif 759 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) 760 ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 761 #endif 762 763 PetscFunctionReturn(0); 764 } 765 } else if (isdraw) { 766 PetscDraw draw; 767 PetscTruth isnull; 768 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 769 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 770 } 771 772 if (size == 1) { 773 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 774 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 775 } else { 776 /* assemble the entire matrix onto first processor. */ 777 Mat A; 778 Mat_SeqAIJ *Aloc; 779 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 780 PetscScalar *a; 781 782 if (!rank) { 783 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 784 } else { 785 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 786 } 787 PetscLogObjectParent(mat,A); 788 789 /* copy over the A part */ 790 Aloc = (Mat_SeqAIJ*)aij->A->data; 791 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 792 row = aij->rstart; 793 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 794 for (i=0; i<m; i++) { 795 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 796 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 797 } 798 aj = Aloc->j; 799 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 800 801 /* copy over the B part */ 802 Aloc = (Mat_SeqAIJ*)aij->B->data; 803 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 804 row = aij->rstart; 805 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 806 ct = cols; 807 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 808 for (i=0; i<m; i++) { 809 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 810 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 811 } 812 ierr = PetscFree(ct);CHKERRQ(ierr); 813 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 814 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 815 /* 816 Everyone has to call to draw the matrix since the graphics waits are 817 synchronized across all processors that share the PetscDraw object 818 */ 819 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 820 if (!rank) { 821 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 822 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 823 } 824 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 825 ierr = MatDestroy(A);CHKERRQ(ierr); 826 } 827 PetscFunctionReturn(0); 828 } 829 830 #undef __FUNCT__ 831 #define __FUNCT__ "MatView_MPIAIJ" 832 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 833 { 834 int ierr; 835 PetscTruth isascii,isdraw,issocket,isbinary; 836 837 PetscFunctionBegin; 838 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 839 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 840 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 841 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 842 if (isascii || isdraw || isbinary || issocket) { 843 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 844 } else { 845 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 846 } 847 PetscFunctionReturn(0); 848 } 849 850 851 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatRelax_MPIAIJ" 854 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 855 { 856 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 857 int ierr; 858 Vec bb1; 859 PetscScalar mone=-1.0; 860 861 PetscFunctionBegin; 862 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 863 864 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 865 866 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 867 if (flag & SOR_ZERO_INITIAL_GUESS) { 868 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 869 its--; 870 } 871 872 while (its--) { 873 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 874 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 875 876 /* update rhs: bb1 = bb - B*x */ 877 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 878 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 879 880 /* local sweep */ 881 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 882 CHKERRQ(ierr); 883 } 884 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 885 if (flag & SOR_ZERO_INITIAL_GUESS) { 886 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 887 its--; 888 } 889 while (its--) { 890 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 891 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 892 893 /* update rhs: bb1 = bb - B*x */ 894 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 895 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 896 897 /* local sweep */ 898 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 899 CHKERRQ(ierr); 900 } 901 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 902 if (flag & SOR_ZERO_INITIAL_GUESS) { 903 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 904 its--; 905 } 906 while (its--) { 907 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 908 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 909 910 /* update rhs: bb1 = bb - B*x */ 911 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 912 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 913 914 /* local sweep */ 915 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 916 CHKERRQ(ierr); 917 } 918 } else { 919 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 920 } 921 922 ierr = VecDestroy(bb1);CHKERRQ(ierr); 923 PetscFunctionReturn(0); 924 } 925 926 #undef __FUNCT__ 927 #define __FUNCT__ "MatGetInfo_MPIAIJ" 928 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 929 { 930 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 931 Mat A = mat->A,B = mat->B; 932 int ierr; 933 PetscReal isend[5],irecv[5]; 934 935 PetscFunctionBegin; 936 info->block_size = 1.0; 937 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 938 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 939 isend[3] = info->memory; isend[4] = info->mallocs; 940 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 941 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 942 isend[3] += info->memory; isend[4] += info->mallocs; 943 if (flag == MAT_LOCAL) { 944 info->nz_used = isend[0]; 945 info->nz_allocated = isend[1]; 946 info->nz_unneeded = isend[2]; 947 info->memory = isend[3]; 948 info->mallocs = isend[4]; 949 } else if (flag == MAT_GLOBAL_MAX) { 950 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 951 info->nz_used = irecv[0]; 952 info->nz_allocated = irecv[1]; 953 info->nz_unneeded = irecv[2]; 954 info->memory = irecv[3]; 955 info->mallocs = irecv[4]; 956 } else if (flag == MAT_GLOBAL_SUM) { 957 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 958 info->nz_used = irecv[0]; 959 info->nz_allocated = irecv[1]; 960 info->nz_unneeded = irecv[2]; 961 info->memory = irecv[3]; 962 info->mallocs = irecv[4]; 963 } 964 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 965 info->fill_ratio_needed = 0; 966 info->factor_mallocs = 0; 967 info->rows_global = (double)matin->M; 968 info->columns_global = (double)matin->N; 969 info->rows_local = (double)matin->m; 970 info->columns_local = (double)matin->N; 971 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatSetOption_MPIAIJ" 977 int MatSetOption_MPIAIJ(Mat A,MatOption op) 978 { 979 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 980 int ierr; 981 982 PetscFunctionBegin; 983 switch (op) { 984 case MAT_NO_NEW_NONZERO_LOCATIONS: 985 case MAT_YES_NEW_NONZERO_LOCATIONS: 986 case MAT_COLUMNS_UNSORTED: 987 case MAT_COLUMNS_SORTED: 988 case MAT_NEW_NONZERO_ALLOCATION_ERR: 989 case MAT_KEEP_ZEROED_ROWS: 990 case MAT_NEW_NONZERO_LOCATION_ERR: 991 case MAT_USE_INODES: 992 case MAT_DO_NOT_USE_INODES: 993 case MAT_IGNORE_ZERO_ENTRIES: 994 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 995 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 996 break; 997 case MAT_ROW_ORIENTED: 998 a->roworiented = PETSC_TRUE; 999 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1000 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1001 break; 1002 case MAT_ROWS_SORTED: 1003 case MAT_ROWS_UNSORTED: 1004 case MAT_YES_NEW_DIAGONALS: 1005 case MAT_USE_SINGLE_PRECISION_SOLVES: 1006 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1007 break; 1008 case MAT_COLUMN_ORIENTED: 1009 a->roworiented = PETSC_FALSE; 1010 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1011 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1012 break; 1013 case MAT_IGNORE_OFF_PROC_ENTRIES: 1014 a->donotstash = PETSC_TRUE; 1015 break; 1016 case MAT_NO_NEW_DIAGONALS: 1017 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1018 default: 1019 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatGetRow_MPIAIJ" 1026 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1027 { 1028 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1029 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1030 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1031 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1032 int *cmap,*idx_p; 1033 1034 PetscFunctionBegin; 1035 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1036 mat->getrowactive = PETSC_TRUE; 1037 1038 if (!mat->rowvalues && (idx || v)) { 1039 /* 1040 allocate enough space to hold information from the longest row. 1041 */ 1042 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1043 int max = 1,tmp; 1044 for (i=0; i<matin->m; i++) { 1045 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1046 if (max < tmp) { max = tmp; } 1047 } 1048 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1049 mat->rowindices = (int*)(mat->rowvalues + max); 1050 } 1051 1052 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1053 lrow = row - rstart; 1054 1055 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1056 if (!v) {pvA = 0; pvB = 0;} 1057 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1058 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1059 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1060 nztot = nzA + nzB; 1061 1062 cmap = mat->garray; 1063 if (v || idx) { 1064 if (nztot) { 1065 /* Sort by increasing column numbers, assuming A and B already sorted */ 1066 int imark = -1; 1067 if (v) { 1068 *v = v_p = mat->rowvalues; 1069 for (i=0; i<nzB; i++) { 1070 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1071 else break; 1072 } 1073 imark = i; 1074 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1075 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1076 } 1077 if (idx) { 1078 *idx = idx_p = mat->rowindices; 1079 if (imark > -1) { 1080 for (i=0; i<imark; i++) { 1081 idx_p[i] = cmap[cworkB[i]]; 1082 } 1083 } else { 1084 for (i=0; i<nzB; i++) { 1085 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1086 else break; 1087 } 1088 imark = i; 1089 } 1090 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1091 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1092 } 1093 } else { 1094 if (idx) *idx = 0; 1095 if (v) *v = 0; 1096 } 1097 } 1098 *nz = nztot; 1099 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1100 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1101 PetscFunctionReturn(0); 1102 } 1103 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1106 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1107 { 1108 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1109 1110 PetscFunctionBegin; 1111 if (aij->getrowactive == PETSC_FALSE) { 1112 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1113 } 1114 aij->getrowactive = PETSC_FALSE; 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatNorm_MPIAIJ" 1120 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1121 { 1122 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1123 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1124 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1125 PetscReal sum = 0.0; 1126 PetscScalar *v; 1127 1128 PetscFunctionBegin; 1129 if (aij->size == 1) { 1130 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1131 } else { 1132 if (type == NORM_FROBENIUS) { 1133 v = amat->a; 1134 for (i=0; i<amat->nz; i++) { 1135 #if defined(PETSC_USE_COMPLEX) 1136 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1137 #else 1138 sum += (*v)*(*v); v++; 1139 #endif 1140 } 1141 v = bmat->a; 1142 for (i=0; i<bmat->nz; i++) { 1143 #if defined(PETSC_USE_COMPLEX) 1144 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1145 #else 1146 sum += (*v)*(*v); v++; 1147 #endif 1148 } 1149 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1150 *norm = sqrt(*norm); 1151 } else if (type == NORM_1) { /* max column norm */ 1152 PetscReal *tmp,*tmp2; 1153 int *jj,*garray = aij->garray; 1154 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1155 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1156 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1157 *norm = 0.0; 1158 v = amat->a; jj = amat->j; 1159 for (j=0; j<amat->nz; j++) { 1160 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1161 } 1162 v = bmat->a; jj = bmat->j; 1163 for (j=0; j<bmat->nz; j++) { 1164 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1165 } 1166 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1167 for (j=0; j<mat->N; j++) { 1168 if (tmp2[j] > *norm) *norm = tmp2[j]; 1169 } 1170 ierr = PetscFree(tmp);CHKERRQ(ierr); 1171 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1172 } else if (type == NORM_INFINITY) { /* max row norm */ 1173 PetscReal ntemp = 0.0; 1174 for (j=0; j<aij->A->m; j++) { 1175 v = amat->a + amat->i[j] + shift; 1176 sum = 0.0; 1177 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1178 sum += PetscAbsScalar(*v); v++; 1179 } 1180 v = bmat->a + bmat->i[j] + shift; 1181 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1182 sum += PetscAbsScalar(*v); v++; 1183 } 1184 if (sum > ntemp) ntemp = sum; 1185 } 1186 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1187 } else { 1188 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1189 } 1190 } 1191 PetscFunctionReturn(0); 1192 } 1193 1194 #undef __FUNCT__ 1195 #define __FUNCT__ "MatTranspose_MPIAIJ" 1196 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1197 { 1198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1199 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1200 int ierr,shift = Aloc->indexshift; 1201 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1202 Mat B; 1203 PetscScalar *array; 1204 1205 PetscFunctionBegin; 1206 if (!matout && M != N) { 1207 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1208 } 1209 1210 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1211 1212 /* copy over the A part */ 1213 Aloc = (Mat_SeqAIJ*)a->A->data; 1214 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1215 row = a->rstart; 1216 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1217 for (i=0; i<m; i++) { 1218 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1219 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1220 } 1221 aj = Aloc->j; 1222 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1223 1224 /* copy over the B part */ 1225 Aloc = (Mat_SeqAIJ*)a->B->data; 1226 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1227 row = a->rstart; 1228 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1229 ct = cols; 1230 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1231 for (i=0; i<m; i++) { 1232 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1233 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1234 } 1235 ierr = PetscFree(ct);CHKERRQ(ierr); 1236 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1237 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1238 if (matout) { 1239 *matout = B; 1240 } else { 1241 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1242 } 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNCT__ 1247 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1248 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1249 { 1250 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1251 Mat a = aij->A,b = aij->B; 1252 int ierr,s1,s2,s3; 1253 1254 PetscFunctionBegin; 1255 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1256 if (rr) { 1257 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1258 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1259 /* Overlap communication with computation. */ 1260 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1261 } 1262 if (ll) { 1263 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1264 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1265 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1266 } 1267 /* scale the diagonal block */ 1268 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1269 1270 if (rr) { 1271 /* Do a scatter end and then right scale the off-diagonal block */ 1272 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1273 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1274 } 1275 1276 PetscFunctionReturn(0); 1277 } 1278 1279 1280 #undef __FUNCT__ 1281 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1282 int MatPrintHelp_MPIAIJ(Mat A) 1283 { 1284 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1285 int ierr; 1286 1287 PetscFunctionBegin; 1288 if (!a->rank) { 1289 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1290 } 1291 PetscFunctionReturn(0); 1292 } 1293 1294 #undef __FUNCT__ 1295 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1296 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1297 { 1298 PetscFunctionBegin; 1299 *bs = 1; 1300 PetscFunctionReturn(0); 1301 } 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1304 int MatSetUnfactored_MPIAIJ(Mat A) 1305 { 1306 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1307 int ierr; 1308 1309 PetscFunctionBegin; 1310 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1311 PetscFunctionReturn(0); 1312 } 1313 1314 #undef __FUNCT__ 1315 #define __FUNCT__ "MatEqual_MPIAIJ" 1316 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1317 { 1318 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1319 Mat a,b,c,d; 1320 PetscTruth flg; 1321 int ierr; 1322 1323 PetscFunctionBegin; 1324 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1325 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1326 a = matA->A; b = matA->B; 1327 c = matB->A; d = matB->B; 1328 1329 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1330 if (flg == PETSC_TRUE) { 1331 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1332 } 1333 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1334 PetscFunctionReturn(0); 1335 } 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatCopy_MPIAIJ" 1339 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1340 { 1341 int ierr; 1342 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1343 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1344 PetscTruth flg; 1345 1346 PetscFunctionBegin; 1347 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1348 if (str != SAME_NONZERO_PATTERN || !flg) { 1349 /* because of the column compression in the off-processor part of the matrix a->B, 1350 the number of columns in a->B and b->B may be different, hence we cannot call 1351 the MatCopy() directly on the two parts. If need be, we can provide a more 1352 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1353 then copying the submatrices */ 1354 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1355 } else { 1356 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1357 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1358 } 1359 PetscFunctionReturn(0); 1360 } 1361 1362 #undef __FUNCT__ 1363 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1364 int MatSetUpPreallocation_MPIAIJ(Mat A) 1365 { 1366 int ierr; 1367 1368 PetscFunctionBegin; 1369 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1374 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1375 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1376 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1377 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1378 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1379 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1380 #endif 1381 1382 #include "petscblaslapack.h" 1383 1384 #undef __FUNCT__ 1385 #define __FUNCT__ "MatAXPY_MPIAIJ" 1386 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1387 { 1388 int ierr,one; 1389 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1390 Mat_SeqAIJ *x,*y; 1391 1392 PetscFunctionBegin; 1393 if (str == SAME_NONZERO_PATTERN) { 1394 x = (Mat_SeqAIJ *)xx->A->data; 1395 y = (Mat_SeqAIJ *)yy->A->data; 1396 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1397 x = (Mat_SeqAIJ *)xx->B->data; 1398 y = (Mat_SeqAIJ *)yy->B->data; 1399 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1400 } else { 1401 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1402 } 1403 PetscFunctionReturn(0); 1404 } 1405 1406 /* -------------------------------------------------------------------*/ 1407 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1408 MatGetRow_MPIAIJ, 1409 MatRestoreRow_MPIAIJ, 1410 MatMult_MPIAIJ, 1411 MatMultAdd_MPIAIJ, 1412 MatMultTranspose_MPIAIJ, 1413 MatMultTransposeAdd_MPIAIJ, 1414 0, 1415 0, 1416 0, 1417 0, 1418 0, 1419 0, 1420 MatRelax_MPIAIJ, 1421 MatTranspose_MPIAIJ, 1422 MatGetInfo_MPIAIJ, 1423 MatEqual_MPIAIJ, 1424 MatGetDiagonal_MPIAIJ, 1425 MatDiagonalScale_MPIAIJ, 1426 MatNorm_MPIAIJ, 1427 MatAssemblyBegin_MPIAIJ, 1428 MatAssemblyEnd_MPIAIJ, 1429 0, 1430 MatSetOption_MPIAIJ, 1431 MatZeroEntries_MPIAIJ, 1432 MatZeroRows_MPIAIJ, 1433 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1434 MatLUFactorSymbolic_MPIAIJ_TFS, 1435 #else 1436 0, 1437 #endif 1438 0, 1439 0, 1440 0, 1441 MatSetUpPreallocation_MPIAIJ, 1442 0, 1443 0, 1444 0, 1445 0, 1446 MatDuplicate_MPIAIJ, 1447 0, 1448 0, 1449 0, 1450 0, 1451 MatAXPY_MPIAIJ, 1452 MatGetSubMatrices_MPIAIJ, 1453 MatIncreaseOverlap_MPIAIJ, 1454 MatGetValues_MPIAIJ, 1455 MatCopy_MPIAIJ, 1456 MatPrintHelp_MPIAIJ, 1457 MatScale_MPIAIJ, 1458 0, 1459 0, 1460 0, 1461 MatGetBlockSize_MPIAIJ, 1462 0, 1463 0, 1464 0, 1465 0, 1466 MatFDColoringCreate_MPIAIJ, 1467 0, 1468 MatSetUnfactored_MPIAIJ, 1469 0, 1470 0, 1471 MatGetSubMatrix_MPIAIJ, 1472 MatDestroy_MPIAIJ, 1473 MatView_MPIAIJ, 1474 MatGetPetscMaps_Petsc, 1475 0, 1476 0, 1477 0, 1478 0, 1479 0, 1480 0, 1481 0, 1482 0, 1483 MatSetColoring_MPIAIJ, 1484 MatSetValuesAdic_MPIAIJ, 1485 MatSetValuesAdifor_MPIAIJ 1486 }; 1487 1488 /* ----------------------------------------------------------------------------------------*/ 1489 1490 EXTERN_C_BEGIN 1491 #undef __FUNCT__ 1492 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1493 int MatStoreValues_MPIAIJ(Mat mat) 1494 { 1495 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1496 int ierr; 1497 1498 PetscFunctionBegin; 1499 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1500 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1501 PetscFunctionReturn(0); 1502 } 1503 EXTERN_C_END 1504 1505 EXTERN_C_BEGIN 1506 #undef __FUNCT__ 1507 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1508 int MatRetrieveValues_MPIAIJ(Mat mat) 1509 { 1510 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1511 int ierr; 1512 1513 PetscFunctionBegin; 1514 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1515 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1516 PetscFunctionReturn(0); 1517 } 1518 EXTERN_C_END 1519 1520 #include "petscpc.h" 1521 EXTERN_C_BEGIN 1522 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1523 EXTERN_C_END 1524 1525 EXTERN_C_BEGIN 1526 #undef __FUNCT__ 1527 #define __FUNCT__ "MatCreate_MPIAIJ" 1528 int MatCreate_MPIAIJ(Mat B) 1529 { 1530 Mat_MPIAIJ *b; 1531 int ierr,i,size; 1532 1533 PetscFunctionBegin; 1534 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1535 1536 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1537 B->data = (void*)b; 1538 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1539 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1540 B->factor = 0; 1541 B->assembled = PETSC_FALSE; 1542 B->mapping = 0; 1543 1544 B->insertmode = NOT_SET_VALUES; 1545 b->size = size; 1546 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1547 1548 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1549 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1550 1551 /* the information in the maps duplicates the information computed below, eventually 1552 we should remove the duplicate information that is not contained in the maps */ 1553 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1554 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1555 1556 /* build local table of row and column ownerships */ 1557 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1558 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1559 b->cowners = b->rowners + b->size + 2; 1560 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1561 b->rowners[0] = 0; 1562 for (i=2; i<=b->size; i++) { 1563 b->rowners[i] += b->rowners[i-1]; 1564 } 1565 b->rstart = b->rowners[b->rank]; 1566 b->rend = b->rowners[b->rank+1]; 1567 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1568 b->cowners[0] = 0; 1569 for (i=2; i<=b->size; i++) { 1570 b->cowners[i] += b->cowners[i-1]; 1571 } 1572 b->cstart = b->cowners[b->rank]; 1573 b->cend = b->cowners[b->rank+1]; 1574 1575 /* build cache for off array entries formed */ 1576 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1577 b->donotstash = PETSC_FALSE; 1578 b->colmap = 0; 1579 b->garray = 0; 1580 b->roworiented = PETSC_TRUE; 1581 1582 /* stuff used for matrix vector multiply */ 1583 b->lvec = PETSC_NULL; 1584 b->Mvctx = PETSC_NULL; 1585 1586 /* stuff for MatGetRow() */ 1587 b->rowindices = 0; 1588 b->rowvalues = 0; 1589 b->getrowactive = PETSC_FALSE; 1590 1591 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1592 "MatStoreValues_MPIAIJ", 1593 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1594 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1595 "MatRetrieveValues_MPIAIJ", 1596 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1597 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1598 "MatGetDiagonalBlock_MPIAIJ", 1599 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1600 1601 PetscFunctionReturn(0); 1602 } 1603 EXTERN_C_END 1604 1605 #undef __FUNCT__ 1606 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1607 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1608 { 1609 Mat mat; 1610 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1611 int ierr; 1612 1613 PetscFunctionBegin; 1614 *newmat = 0; 1615 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1616 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1617 a = (Mat_MPIAIJ*)mat->data; 1618 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1619 mat->factor = matin->factor; 1620 mat->assembled = PETSC_TRUE; 1621 mat->insertmode = NOT_SET_VALUES; 1622 mat->preallocated = PETSC_TRUE; 1623 1624 a->rstart = oldmat->rstart; 1625 a->rend = oldmat->rend; 1626 a->cstart = oldmat->cstart; 1627 a->cend = oldmat->cend; 1628 a->size = oldmat->size; 1629 a->rank = oldmat->rank; 1630 a->donotstash = oldmat->donotstash; 1631 a->roworiented = oldmat->roworiented; 1632 a->rowindices = 0; 1633 a->rowvalues = 0; 1634 a->getrowactive = PETSC_FALSE; 1635 1636 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1637 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1638 if (oldmat->colmap) { 1639 #if defined (PETSC_USE_CTABLE) 1640 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1641 #else 1642 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1643 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1644 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1645 #endif 1646 } else a->colmap = 0; 1647 if (oldmat->garray) { 1648 int len; 1649 len = oldmat->B->n; 1650 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1651 PetscLogObjectMemory(mat,len*sizeof(int)); 1652 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1653 } else a->garray = 0; 1654 1655 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1656 PetscLogObjectParent(mat,a->lvec); 1657 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1658 PetscLogObjectParent(mat,a->Mvctx); 1659 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1660 PetscLogObjectParent(mat,a->A); 1661 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1662 PetscLogObjectParent(mat,a->B); 1663 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1664 *newmat = mat; 1665 PetscFunctionReturn(0); 1666 } 1667 1668 #include "petscsys.h" 1669 1670 EXTERN_C_BEGIN 1671 #undef __FUNCT__ 1672 #define __FUNCT__ "MatLoad_MPIAIJ" 1673 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1674 { 1675 Mat A; 1676 PetscScalar *vals,*svals; 1677 MPI_Comm comm = ((PetscObject)viewer)->comm; 1678 MPI_Status status; 1679 int i,nz,ierr,j,rstart,rend,fd; 1680 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1681 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1682 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1683 1684 PetscFunctionBegin; 1685 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1686 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1687 if (!rank) { 1688 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1689 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1690 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1691 if (header[3] < 0) { 1692 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1693 } 1694 } 1695 1696 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1697 M = header[1]; N = header[2]; 1698 /* determine ownership of all rows */ 1699 m = M/size + ((M % size) > rank); 1700 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1701 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1702 rowners[0] = 0; 1703 for (i=2; i<=size; i++) { 1704 rowners[i] += rowners[i-1]; 1705 } 1706 rstart = rowners[rank]; 1707 rend = rowners[rank+1]; 1708 1709 /* distribute row lengths to all processors */ 1710 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1711 offlens = ourlens + (rend-rstart); 1712 if (!rank) { 1713 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1714 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1715 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1716 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1717 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1718 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1719 } else { 1720 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1721 } 1722 1723 if (!rank) { 1724 /* calculate the number of nonzeros on each processor */ 1725 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1726 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1727 for (i=0; i<size; i++) { 1728 for (j=rowners[i]; j< rowners[i+1]; j++) { 1729 procsnz[i] += rowlengths[j]; 1730 } 1731 } 1732 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1733 1734 /* determine max buffer needed and allocate it */ 1735 maxnz = 0; 1736 for (i=0; i<size; i++) { 1737 maxnz = PetscMax(maxnz,procsnz[i]); 1738 } 1739 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1740 1741 /* read in my part of the matrix column indices */ 1742 nz = procsnz[0]; 1743 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1744 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1745 1746 /* read in every one elses and ship off */ 1747 for (i=1; i<size; i++) { 1748 nz = procsnz[i]; 1749 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1750 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1751 } 1752 ierr = PetscFree(cols);CHKERRQ(ierr); 1753 } else { 1754 /* determine buffer space needed for message */ 1755 nz = 0; 1756 for (i=0; i<m; i++) { 1757 nz += ourlens[i]; 1758 } 1759 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1760 1761 /* receive message of column indices*/ 1762 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1763 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1764 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1765 } 1766 1767 /* determine column ownership if matrix is not square */ 1768 if (N != M) { 1769 n = N/size + ((N % size) > rank); 1770 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1771 cstart = cend - n; 1772 } else { 1773 cstart = rstart; 1774 cend = rend; 1775 n = cend - cstart; 1776 } 1777 1778 /* loop over local rows, determining number of off diagonal entries */ 1779 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1780 jj = 0; 1781 for (i=0; i<m; i++) { 1782 for (j=0; j<ourlens[i]; j++) { 1783 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1784 jj++; 1785 } 1786 } 1787 1788 /* create our matrix */ 1789 for (i=0; i<m; i++) { 1790 ourlens[i] -= offlens[i]; 1791 } 1792 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1793 A = *newmat; 1794 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1795 for (i=0; i<m; i++) { 1796 ourlens[i] += offlens[i]; 1797 } 1798 1799 if (!rank) { 1800 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1801 1802 /* read in my part of the matrix numerical values */ 1803 nz = procsnz[0]; 1804 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1805 1806 /* insert into matrix */ 1807 jj = rstart; 1808 smycols = mycols; 1809 svals = vals; 1810 for (i=0; i<m; i++) { 1811 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1812 smycols += ourlens[i]; 1813 svals += ourlens[i]; 1814 jj++; 1815 } 1816 1817 /* read in other processors and ship out */ 1818 for (i=1; i<size; i++) { 1819 nz = procsnz[i]; 1820 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1821 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1822 } 1823 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1824 } else { 1825 /* receive numeric values */ 1826 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1827 1828 /* receive message of values*/ 1829 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1830 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1831 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1832 1833 /* insert into matrix */ 1834 jj = rstart; 1835 smycols = mycols; 1836 svals = vals; 1837 for (i=0; i<m; i++) { 1838 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1839 smycols += ourlens[i]; 1840 svals += ourlens[i]; 1841 jj++; 1842 } 1843 } 1844 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1845 ierr = PetscFree(vals);CHKERRQ(ierr); 1846 ierr = PetscFree(mycols);CHKERRQ(ierr); 1847 ierr = PetscFree(rowners);CHKERRQ(ierr); 1848 1849 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1850 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1851 PetscFunctionReturn(0); 1852 } 1853 EXTERN_C_END 1854 1855 #undef __FUNCT__ 1856 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1857 /* 1858 Not great since it makes two copies of the submatrix, first an SeqAIJ 1859 in local and then by concatenating the local matrices the end result. 1860 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1861 */ 1862 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1863 { 1864 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1865 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1866 Mat *local,M,Mreuse; 1867 PetscScalar *vwork,*aa; 1868 MPI_Comm comm = mat->comm; 1869 Mat_SeqAIJ *aij; 1870 1871 1872 PetscFunctionBegin; 1873 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1874 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1875 1876 if (call == MAT_REUSE_MATRIX) { 1877 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1878 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1879 local = &Mreuse; 1880 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1881 } else { 1882 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1883 Mreuse = *local; 1884 ierr = PetscFree(local);CHKERRQ(ierr); 1885 } 1886 1887 /* 1888 m - number of local rows 1889 n - number of columns (same on all processors) 1890 rstart - first row in new global matrix generated 1891 */ 1892 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1893 if (call == MAT_INITIAL_MATRIX) { 1894 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1895 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1896 ii = aij->i; 1897 jj = aij->j; 1898 1899 /* 1900 Determine the number of non-zeros in the diagonal and off-diagonal 1901 portions of the matrix in order to do correct preallocation 1902 */ 1903 1904 /* first get start and end of "diagonal" columns */ 1905 if (csize == PETSC_DECIDE) { 1906 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 1907 if (mglobal == n) { /* square matrix */ 1908 nlocal = m; 1909 } else { 1910 nlocal = n/size + ((n % size) > rank); 1911 } 1912 } else { 1913 nlocal = csize; 1914 } 1915 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1916 rstart = rend - nlocal; 1917 if (rank == size - 1 && rend != n) { 1918 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1919 } 1920 1921 /* next, compute all the lengths */ 1922 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1923 olens = dlens + m; 1924 for (i=0; i<m; i++) { 1925 jend = ii[i+1] - ii[i]; 1926 olen = 0; 1927 dlen = 0; 1928 for (j=0; j<jend; j++) { 1929 if (*jj < rstart || *jj >= rend) olen++; 1930 else dlen++; 1931 jj++; 1932 } 1933 olens[i] = olen; 1934 dlens[i] = dlen; 1935 } 1936 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1937 ierr = PetscFree(dlens);CHKERRQ(ierr); 1938 } else { 1939 int ml,nl; 1940 1941 M = *newmat; 1942 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1943 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1944 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1945 /* 1946 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1947 rather than the slower MatSetValues(). 1948 */ 1949 M->was_assembled = PETSC_TRUE; 1950 M->assembled = PETSC_FALSE; 1951 } 1952 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1953 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1954 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1955 ii = aij->i; 1956 jj = aij->j; 1957 aa = aij->a; 1958 for (i=0; i<m; i++) { 1959 row = rstart + i; 1960 nz = ii[i+1] - ii[i]; 1961 cwork = jj; jj += nz; 1962 vwork = aa; aa += nz; 1963 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1964 } 1965 1966 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1967 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1968 *newmat = M; 1969 1970 /* save submatrix used in processor for next request */ 1971 if (call == MAT_INITIAL_MATRIX) { 1972 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1973 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1974 } 1975 1976 PetscFunctionReturn(0); 1977 } 1978 1979 #undef __FUNCT__ 1980 #define __FUNCT__ "MatMPIAIJSetPreallocation" 1981 /*@C 1982 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 1983 (the default parallel PETSc format). For good matrix assembly performance 1984 the user should preallocate the matrix storage by setting the parameters 1985 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1986 performance can be increased by more than a factor of 50. 1987 1988 Collective on MPI_Comm 1989 1990 Input Parameters: 1991 + A - the matrix 1992 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1993 (same value is used for all local rows) 1994 . d_nnz - array containing the number of nonzeros in the various rows of the 1995 DIAGONAL portion of the local submatrix (possibly different for each row) 1996 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1997 The size of this array is equal to the number of local rows, i.e 'm'. 1998 You must leave room for the diagonal entry even if it is zero. 1999 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2000 submatrix (same value is used for all local rows). 2001 - o_nnz - array containing the number of nonzeros in the various rows of the 2002 OFF-DIAGONAL portion of the local submatrix (possibly different for 2003 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2004 structure. The size of this array is equal to the number 2005 of local rows, i.e 'm'. 2006 2007 The AIJ format (also called the Yale sparse matrix format or 2008 compressed row storage), is fully compatible with standard Fortran 77 2009 storage. That is, the stored row and column indices can begin at 2010 either one (as in Fortran) or zero. See the users manual for details. 2011 2012 The user MUST specify either the local or global matrix dimensions 2013 (possibly both). 2014 2015 The parallel matrix is partitioned such that the first m0 rows belong to 2016 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2017 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2018 2019 The DIAGONAL portion of the local submatrix of a processor can be defined 2020 as the submatrix which is obtained by extraction the part corresponding 2021 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2022 first row that belongs to the processor, and r2 is the last row belonging 2023 to the this processor. This is a square mxm matrix. The remaining portion 2024 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2025 2026 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2027 2028 By default, this format uses inodes (identical nodes) when possible. 2029 We search for consecutive rows with the same nonzero structure, thereby 2030 reusing matrix information to achieve increased efficiency. 2031 2032 Options Database Keys: 2033 + -mat_aij_no_inode - Do not use inodes 2034 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2035 - -mat_aij_oneindex - Internally use indexing starting at 1 2036 rather than 0. Note that when calling MatSetValues(), 2037 the user still MUST index entries starting at 0! 2038 2039 Example usage: 2040 2041 Consider the following 8x8 matrix with 34 non-zero values, that is 2042 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2043 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2044 as follows: 2045 2046 .vb 2047 1 2 0 | 0 3 0 | 0 4 2048 Proc0 0 5 6 | 7 0 0 | 8 0 2049 9 0 10 | 11 0 0 | 12 0 2050 ------------------------------------- 2051 13 0 14 | 15 16 17 | 0 0 2052 Proc1 0 18 0 | 19 20 21 | 0 0 2053 0 0 0 | 22 23 0 | 24 0 2054 ------------------------------------- 2055 Proc2 25 26 27 | 0 0 28 | 29 0 2056 30 0 0 | 31 32 33 | 0 34 2057 .ve 2058 2059 This can be represented as a collection of submatrices as: 2060 2061 .vb 2062 A B C 2063 D E F 2064 G H I 2065 .ve 2066 2067 Where the submatrices A,B,C are owned by proc0, D,E,F are 2068 owned by proc1, G,H,I are owned by proc2. 2069 2070 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2071 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2072 The 'M','N' parameters are 8,8, and have the same values on all procs. 2073 2074 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2075 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2076 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2077 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2078 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2079 matrix, ans [DF] as another SeqAIJ matrix. 2080 2081 When d_nz, o_nz parameters are specified, d_nz storage elements are 2082 allocated for every row of the local diagonal submatrix, and o_nz 2083 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2084 One way to choose d_nz and o_nz is to use the max nonzerors per local 2085 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2086 In this case, the values of d_nz,o_nz are: 2087 .vb 2088 proc0 : dnz = 2, o_nz = 2 2089 proc1 : dnz = 3, o_nz = 2 2090 proc2 : dnz = 1, o_nz = 4 2091 .ve 2092 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2093 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2094 for proc3. i.e we are using 12+15+10=37 storage locations to store 2095 34 values. 2096 2097 When d_nnz, o_nnz parameters are specified, the storage is specified 2098 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2099 In the above case the values for d_nnz,o_nnz are: 2100 .vb 2101 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2102 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2103 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2104 .ve 2105 Here the space allocated is sum of all the above values i.e 34, and 2106 hence pre-allocation is perfect. 2107 2108 Level: intermediate 2109 2110 .keywords: matrix, aij, compressed row, sparse, parallel 2111 2112 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2113 @*/ 2114 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2115 { 2116 Mat_MPIAIJ *b; 2117 int ierr,i; 2118 PetscTruth flg2; 2119 2120 PetscFunctionBegin; 2121 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2122 if (!flg2) PetscFunctionReturn(0); 2123 B->preallocated = PETSC_TRUE; 2124 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2125 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2126 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2127 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2128 if (d_nnz) { 2129 for (i=0; i<B->m; i++) { 2130 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2131 } 2132 } 2133 if (o_nnz) { 2134 for (i=0; i<B->m; i++) { 2135 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2136 } 2137 } 2138 b = (Mat_MPIAIJ*)B->data; 2139 2140 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2141 PetscLogObjectParent(B,b->A); 2142 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2143 PetscLogObjectParent(B,b->B); 2144 2145 PetscFunctionReturn(0); 2146 } 2147 2148 #undef __FUNCT__ 2149 #define __FUNCT__ "MatCreateMPIAIJ" 2150 /*@C 2151 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2152 (the default parallel PETSc format). For good matrix assembly performance 2153 the user should preallocate the matrix storage by setting the parameters 2154 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2155 performance can be increased by more than a factor of 50. 2156 2157 Collective on MPI_Comm 2158 2159 Input Parameters: 2160 + comm - MPI communicator 2161 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2162 This value should be the same as the local size used in creating the 2163 y vector for the matrix-vector product y = Ax. 2164 . n - This value should be the same as the local size used in creating the 2165 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2166 calculated if N is given) For square matrices n is almost always m. 2167 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2168 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2169 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2170 (same value is used for all local rows) 2171 . d_nnz - array containing the number of nonzeros in the various rows of the 2172 DIAGONAL portion of the local submatrix (possibly different for each row) 2173 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2174 The size of this array is equal to the number of local rows, i.e 'm'. 2175 You must leave room for the diagonal entry even if it is zero. 2176 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2177 submatrix (same value is used for all local rows). 2178 - o_nnz - array containing the number of nonzeros in the various rows of the 2179 OFF-DIAGONAL portion of the local submatrix (possibly different for 2180 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2181 structure. The size of this array is equal to the number 2182 of local rows, i.e 'm'. 2183 2184 Output Parameter: 2185 . A - the matrix 2186 2187 Notes: 2188 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2189 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2190 storage requirements for this matrix. 2191 2192 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2193 processor than it must be used on all processors that share the object for 2194 that argument. 2195 2196 The AIJ format (also called the Yale sparse matrix format or 2197 compressed row storage), is fully compatible with standard Fortran 77 2198 storage. That is, the stored row and column indices can begin at 2199 either one (as in Fortran) or zero. See the users manual for details. 2200 2201 The user MUST specify either the local or global matrix dimensions 2202 (possibly both). 2203 2204 The parallel matrix is partitioned such that the first m0 rows belong to 2205 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2206 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2207 2208 The DIAGONAL portion of the local submatrix of a processor can be defined 2209 as the submatrix which is obtained by extraction the part corresponding 2210 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2211 first row that belongs to the processor, and r2 is the last row belonging 2212 to the this processor. This is a square mxm matrix. The remaining portion 2213 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2214 2215 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2216 2217 By default, this format uses inodes (identical nodes) when possible. 2218 We search for consecutive rows with the same nonzero structure, thereby 2219 reusing matrix information to achieve increased efficiency. 2220 2221 Options Database Keys: 2222 + -mat_aij_no_inode - Do not use inodes 2223 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2224 - -mat_aij_oneindex - Internally use indexing starting at 1 2225 rather than 0. Note that when calling MatSetValues(), 2226 the user still MUST index entries starting at 0! 2227 2228 2229 Example usage: 2230 2231 Consider the following 8x8 matrix with 34 non-zero values, that is 2232 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2233 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2234 as follows: 2235 2236 .vb 2237 1 2 0 | 0 3 0 | 0 4 2238 Proc0 0 5 6 | 7 0 0 | 8 0 2239 9 0 10 | 11 0 0 | 12 0 2240 ------------------------------------- 2241 13 0 14 | 15 16 17 | 0 0 2242 Proc1 0 18 0 | 19 20 21 | 0 0 2243 0 0 0 | 22 23 0 | 24 0 2244 ------------------------------------- 2245 Proc2 25 26 27 | 0 0 28 | 29 0 2246 30 0 0 | 31 32 33 | 0 34 2247 .ve 2248 2249 This can be represented as a collection of submatrices as: 2250 2251 .vb 2252 A B C 2253 D E F 2254 G H I 2255 .ve 2256 2257 Where the submatrices A,B,C are owned by proc0, D,E,F are 2258 owned by proc1, G,H,I are owned by proc2. 2259 2260 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2261 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2262 The 'M','N' parameters are 8,8, and have the same values on all procs. 2263 2264 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2265 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2266 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2267 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2268 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2269 matrix, ans [DF] as another SeqAIJ matrix. 2270 2271 When d_nz, o_nz parameters are specified, d_nz storage elements are 2272 allocated for every row of the local diagonal submatrix, and o_nz 2273 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2274 One way to choose d_nz and o_nz is to use the max nonzerors per local 2275 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2276 In this case, the values of d_nz,o_nz are: 2277 .vb 2278 proc0 : dnz = 2, o_nz = 2 2279 proc1 : dnz = 3, o_nz = 2 2280 proc2 : dnz = 1, o_nz = 4 2281 .ve 2282 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2283 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2284 for proc3. i.e we are using 12+15+10=37 storage locations to store 2285 34 values. 2286 2287 When d_nnz, o_nnz parameters are specified, the storage is specified 2288 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2289 In the above case the values for d_nnz,o_nnz are: 2290 .vb 2291 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2292 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2293 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2294 .ve 2295 Here the space allocated is sum of all the above values i.e 34, and 2296 hence pre-allocation is perfect. 2297 2298 Level: intermediate 2299 2300 .keywords: matrix, aij, compressed row, sparse, parallel 2301 2302 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2303 @*/ 2304 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2305 { 2306 int ierr,size; 2307 2308 PetscFunctionBegin; 2309 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2310 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2311 if (size > 1) { 2312 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2313 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2314 } else { 2315 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2316 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2317 } 2318 PetscFunctionReturn(0); 2319 } 2320 2321 #undef __FUNCT__ 2322 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2323 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2324 { 2325 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2326 PetscFunctionBegin; 2327 *Ad = a->A; 2328 *Ao = a->B; 2329 *colmap = a->garray; 2330 PetscFunctionReturn(0); 2331 } 2332 2333 #undef __FUNCT__ 2334 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2335 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2336 { 2337 int ierr; 2338 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2339 2340 PetscFunctionBegin; 2341 if (coloring->ctype == IS_COLORING_LOCAL) { 2342 int *allcolors,*colors,i; 2343 ISColoring ocoloring; 2344 2345 /* set coloring for diagonal portion */ 2346 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2347 2348 /* set coloring for off-diagonal portion */ 2349 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2350 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2351 for (i=0; i<a->B->n; i++) { 2352 colors[i] = allcolors[a->garray[i]]; 2353 } 2354 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2355 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2356 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2357 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2358 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2359 int *colors,i,*larray; 2360 ISColoring ocoloring; 2361 2362 /* set coloring for diagonal portion */ 2363 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2364 for (i=0; i<a->A->n; i++) { 2365 larray[i] = i + a->cstart; 2366 } 2367 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2368 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2369 for (i=0; i<a->A->n; i++) { 2370 colors[i] = coloring->colors[larray[i]]; 2371 } 2372 ierr = PetscFree(larray);CHKERRQ(ierr); 2373 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2374 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2375 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2376 2377 /* set coloring for off-diagonal portion */ 2378 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2379 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2380 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2381 for (i=0; i<a->B->n; i++) { 2382 colors[i] = coloring->colors[larray[i]]; 2383 } 2384 ierr = PetscFree(larray);CHKERRQ(ierr); 2385 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2386 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2387 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2388 } else { 2389 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2390 } 2391 2392 PetscFunctionReturn(0); 2393 } 2394 2395 #undef __FUNCT__ 2396 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2397 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2398 { 2399 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2400 int ierr; 2401 2402 PetscFunctionBegin; 2403 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2404 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2405 PetscFunctionReturn(0); 2406 } 2407 2408 #undef __FUNCT__ 2409 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2410 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2411 { 2412 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2413 int ierr; 2414 2415 PetscFunctionBegin; 2416 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2417 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 #undef __FUNCT__ 2422 #define __FUNCT__ "MatFileMerge" 2423 /*@C 2424 MatFileMerge - Saves a single large PETSc matrix by reading in 2425 sequential matrices from each processor and concatinating them 2426 together. 2427 2428 Collective on MPI_Comm 2429 2430 Input Parameters: 2431 + comm - the communicators the individual files are on 2432 . infile - name of input file 2433 - outfile - name of output file 2434 2435 Level: advanced 2436 2437 Notes: The number of columns of the matrix in EACH of the seperate files 2438 MUST be the same. 2439 2440 The true name of the input files on each processor is obtained by 2441 appending .rank onto the infile string; where rank is 0 up to one less 2442 then the number of processors 2443 @*/ 2444 int MatFileMerge(MPI_Comm comm,char *infile,char *outfile) 2445 { 2446 int ierr,rank,len,m,n,i,rstart,*indx,nnz,I; 2447 PetscViewer in,out; 2448 char *name; 2449 Mat A,B; 2450 PetscScalar *values; 2451 2452 PetscFunctionBegin; 2453 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2454 ierr = PetscStrlen(infile,&len);CHKERRQ(ierr); 2455 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2456 sprintf(name,"%s.%d",infile,rank); 2457 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_RDONLY,&in);CHKERRQ(ierr); 2458 ierr = PetscFree(name); 2459 ierr = MatLoad(in,MATSEQAIJ,&A);CHKERRQ(ierr); 2460 ierr = PetscViewerDestroy(in);CHKERRQ(ierr); 2461 2462 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 2463 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DETERMINE,n,0,0,0,0,&B);CHKERRQ(ierr); 2464 ierr = MatGetOwnershipRange(B,&rstart,0);CHKERRQ(ierr); 2465 for (i=0;i<m;i++) { 2466 ierr = MatGetRow(A,i,&nnz,&indx,&values);CHKERRQ(ierr); 2467 I = i + rstart; 2468 ierr = MatSetValues(B,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2469 ierr = MatRestoreRow(A,i,&nnz,&indx,&values);CHKERRQ(ierr); 2470 } 2471 ierr = MatDestroy(A);CHKERRQ(ierr); 2472 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2473 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2474 2475 ierr = PetscViewerBinaryOpen(comm,outfile,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2476 ierr = MatView(B,out);CHKERRQ(ierr); 2477 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2478 ierr = MatDestroy(B);CHKERRQ(ierr); 2479 PetscFunctionReturn(0); 2480 } 2481 2482 #undef __FUNCT__ 2483 #define __FUNCT__ "MatFileSplit" 2484 int MatFileSplit(Mat A,char *outfile) 2485 { 2486 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2487 PetscViewer out; 2488 char *name; 2489 Mat B; 2490 PetscScalar *values; 2491 2492 PetscFunctionBegin; 2493 2494 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2495 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2496 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2497 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2498 for (i=0;i<m;i++) { 2499 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2500 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2501 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2502 } 2503 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2504 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2505 2506 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2507 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2508 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2509 sprintf(name,"%s.%d",outfile,rank); 2510 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2511 ierr = PetscFree(name); 2512 ierr = MatView(B,out);CHKERRQ(ierr); 2513 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2514 ierr = MatDestroy(B);CHKERRQ(ierr); 2515 PetscFunctionReturn(0); 2516 } 2517