1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 int i,j,rstart,ncols,n,ierr,flg; 354 int *row,*col,other_disassembled; 355 PetscScalar *val; 356 InsertMode addv = mat->insertmode; 357 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) 358 PetscTruth flag; 359 #endif 360 361 PetscFunctionBegin; 362 if (!aij->donotstash) { 363 while (1) { 364 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 365 if (!flg) break; 366 367 for (i=0; i<n;) { 368 /* Now identify the consecutive vals belonging to the same row */ 369 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 370 if (j < n) ncols = j-i; 371 else ncols = n-i; 372 /* Now assemble all these values with a single function call */ 373 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 374 i = j; 375 } 376 } 377 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 378 } 379 380 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 381 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 382 383 /* determine if any processor has disassembled, if so we must 384 also disassemble ourselfs, in order that we may reassemble. */ 385 /* 386 if nonzero structure of submatrix B cannot change then we know that 387 no processor disassembled thus we can skip this stuff 388 */ 389 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 390 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 391 if (mat->was_assembled && !other_disassembled) { 392 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 393 } 394 } 395 396 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 397 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 398 } 399 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 400 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 401 402 if (aij->rowvalues) { 403 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 404 aij->rowvalues = 0; 405 } 406 #if defined(PETSC_HAVE_SUPERLUDIST) 407 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 408 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 409 #endif 410 411 #if defined(PETSC_HAVE_SPOOLES) 412 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 413 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 414 #endif 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNCT__ 419 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 420 int MatZeroEntries_MPIAIJ(Mat A) 421 { 422 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 423 int ierr; 424 425 PetscFunctionBegin; 426 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 427 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 428 PetscFunctionReturn(0); 429 } 430 431 #undef __FUNCT__ 432 #define __FUNCT__ "MatZeroRows_MPIAIJ" 433 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 434 { 435 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 436 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 437 int *nprocs,j,idx,nsends,row; 438 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 439 int *rvalues,tag = A->tag,count,base,slen,n,*source; 440 int *lens,imdex,*lrows,*values,rstart=l->rstart; 441 MPI_Comm comm = A->comm; 442 MPI_Request *send_waits,*recv_waits; 443 MPI_Status recv_status,*send_status; 444 IS istmp; 445 PetscTruth found; 446 447 PetscFunctionBegin; 448 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 449 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 450 451 /* first count number of contributors to each processor */ 452 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 453 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 454 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 455 for (i=0; i<N; i++) { 456 idx = rows[i]; 457 found = PETSC_FALSE; 458 for (j=0; j<size; j++) { 459 if (idx >= owners[j] && idx < owners[j+1]) { 460 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 461 } 462 } 463 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 464 } 465 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 466 467 /* inform other processors of number of messages and max length*/ 468 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 469 470 /* post receives: */ 471 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 472 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 473 for (i=0; i<nrecvs; i++) { 474 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 475 } 476 477 /* do sends: 478 1) starts[i] gives the starting index in svalues for stuff going to 479 the ith processor 480 */ 481 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 482 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 483 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 484 starts[0] = 0; 485 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 486 for (i=0; i<N; i++) { 487 svalues[starts[owner[i]]++] = rows[i]; 488 } 489 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 490 491 starts[0] = 0; 492 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 493 count = 0; 494 for (i=0; i<size; i++) { 495 if (nprocs[2*i+1]) { 496 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 497 } 498 } 499 ierr = PetscFree(starts);CHKERRQ(ierr); 500 501 base = owners[rank]; 502 503 /* wait on receives */ 504 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 505 source = lens + nrecvs; 506 count = nrecvs; slen = 0; 507 while (count) { 508 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 509 /* unpack receives into our local space */ 510 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 511 source[imdex] = recv_status.MPI_SOURCE; 512 lens[imdex] = n; 513 slen += n; 514 count--; 515 } 516 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 517 518 /* move the data into the send scatter */ 519 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 520 count = 0; 521 for (i=0; i<nrecvs; i++) { 522 values = rvalues + i*nmax; 523 for (j=0; j<lens[i]; j++) { 524 lrows[count++] = values[j] - base; 525 } 526 } 527 ierr = PetscFree(rvalues);CHKERRQ(ierr); 528 ierr = PetscFree(lens);CHKERRQ(ierr); 529 ierr = PetscFree(owner);CHKERRQ(ierr); 530 ierr = PetscFree(nprocs);CHKERRQ(ierr); 531 532 /* actually zap the local rows */ 533 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 534 PetscLogObjectParent(A,istmp); 535 536 /* 537 Zero the required rows. If the "diagonal block" of the matrix 538 is square and the user wishes to set the diagonal we use seperate 539 code so that MatSetValues() is not called for each diagonal allocating 540 new memory, thus calling lots of mallocs and slowing things down. 541 542 Contributed by: Mathew Knepley 543 */ 544 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 545 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 546 if (diag && (l->A->M == l->A->N)) { 547 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 548 } else if (diag) { 549 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 550 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 551 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 552 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 553 } 554 for (i = 0; i < slen; i++) { 555 row = lrows[i] + rstart; 556 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 557 } 558 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 559 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 560 } else { 561 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 562 } 563 ierr = ISDestroy(istmp);CHKERRQ(ierr); 564 ierr = PetscFree(lrows);CHKERRQ(ierr); 565 566 /* wait on sends */ 567 if (nsends) { 568 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 569 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 570 ierr = PetscFree(send_status);CHKERRQ(ierr); 571 } 572 ierr = PetscFree(send_waits);CHKERRQ(ierr); 573 ierr = PetscFree(svalues);CHKERRQ(ierr); 574 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatMult_MPIAIJ" 580 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 581 { 582 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 583 int ierr,nt; 584 585 PetscFunctionBegin; 586 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 587 if (nt != A->n) { 588 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 589 } 590 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 591 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 592 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 593 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatMultAdd_MPIAIJ" 599 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 600 { 601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 602 int ierr; 603 604 PetscFunctionBegin; 605 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 606 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 607 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 608 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 614 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 615 { 616 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 617 int ierr; 618 619 PetscFunctionBegin; 620 /* do nondiagonal part */ 621 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 622 /* send it on its way */ 623 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 624 /* do local part */ 625 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 626 /* receive remote parts: note this assumes the values are not actually */ 627 /* inserted in yy until the next line, which is true for my implementation*/ 628 /* but is not perhaps always true. */ 629 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 #undef __FUNCT__ 634 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 635 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 636 { 637 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 638 int ierr; 639 640 PetscFunctionBegin; 641 /* do nondiagonal part */ 642 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 643 /* send it on its way */ 644 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 645 /* do local part */ 646 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 647 /* receive remote parts: note this assumes the values are not actually */ 648 /* inserted in yy until the next line, which is true for my implementation*/ 649 /* but is not perhaps always true. */ 650 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 651 PetscFunctionReturn(0); 652 } 653 654 /* 655 This only works correctly for square matrices where the subblock A->A is the 656 diagonal block 657 */ 658 #undef __FUNCT__ 659 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 660 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 661 { 662 int ierr; 663 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 664 665 PetscFunctionBegin; 666 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 667 if (a->rstart != a->cstart || a->rend != a->cend) { 668 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 669 } 670 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 671 PetscFunctionReturn(0); 672 } 673 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatScale_MPIAIJ" 676 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 677 { 678 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 679 int ierr; 680 681 PetscFunctionBegin; 682 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 683 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 684 PetscFunctionReturn(0); 685 } 686 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatDestroy_MPIAIJ" 689 int MatDestroy_MPIAIJ(Mat mat) 690 { 691 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 692 int ierr; 693 694 PetscFunctionBegin; 695 #if defined(PETSC_USE_LOG) 696 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 697 #endif 698 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 699 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 700 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 701 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 702 #if defined (PETSC_USE_CTABLE) 703 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 704 #else 705 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 706 #endif 707 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 708 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 709 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 710 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 711 ierr = PetscFree(aij);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 716 extern int MatFactorInfo_Spooles(Mat,PetscViewer); 717 718 #undef __FUNCT__ 719 #define __FUNCT__ "MatView_MPIAIJ_Binary" 720 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 721 { 722 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 723 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 724 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 725 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 726 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 727 PetscScalar *column_values; 728 729 PetscFunctionBegin; 730 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 731 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 732 nz = A->nz + B->nz; 733 if (rank == 0) { 734 header[0] = MAT_FILE_COOKIE; 735 header[1] = mat->M; 736 header[2] = mat->N; 737 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 738 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 739 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 740 /* get largest number of rows any processor has */ 741 rlen = mat->m; 742 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 743 for (i=1; i<size; i++) { 744 rlen = PetscMax(rlen,range[i+1] - range[i]); 745 } 746 } else { 747 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 748 rlen = mat->m; 749 } 750 751 /* load up the local row counts */ 752 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 753 for (i=0; i<mat->m; i++) { 754 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 755 } 756 757 /* store the row lengths to the file */ 758 if (rank == 0) { 759 MPI_Status status; 760 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 761 for (i=1; i<size; i++) { 762 rlen = range[i+1] - range[i]; 763 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 765 } 766 } else { 767 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 768 } 769 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 770 771 /* load up the local column indices */ 772 nzmax = nz; /* )th processor needs space a largest processor needs */ 773 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 774 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 775 cnt = 0; 776 for (i=0; i<mat->m; i++) { 777 for (j=B->i[i]; j<B->i[i+1]; j++) { 778 if ( (col = garray[B->j[j]]) > cstart) break; 779 column_indices[cnt++] = col; 780 } 781 for (k=A->i[i]; k<A->i[i+1]; k++) { 782 column_indices[cnt++] = A->j[k] + cstart; 783 } 784 for (; j<B->i[i+1]; j++) { 785 column_indices[cnt++] = garray[B->j[j]]; 786 } 787 } 788 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 789 790 /* store the column indices to the file */ 791 if (rank == 0) { 792 MPI_Status status; 793 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 794 for (i=1; i<size; i++) { 795 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 796 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 797 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 798 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 799 } 800 } else { 801 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 802 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 803 } 804 ierr = PetscFree(column_indices);CHKERRQ(ierr); 805 806 /* load up the local column values */ 807 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 808 cnt = 0; 809 for (i=0; i<mat->m; i++) { 810 for (j=B->i[i]; j<B->i[i+1]; j++) { 811 if ( garray[B->j[j]] > cstart) break; 812 column_values[cnt++] = B->a[j]; 813 } 814 for (k=A->i[i]; k<A->i[i+1]; k++) { 815 column_values[cnt++] = A->a[k]; 816 } 817 for (; j<B->i[i+1]; j++) { 818 column_values[cnt++] = B->a[j]; 819 } 820 } 821 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 822 823 /* store the column values to the file */ 824 if (rank == 0) { 825 MPI_Status status; 826 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 827 for (i=1; i<size; i++) { 828 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 829 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 830 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 831 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 832 } 833 } else { 834 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 835 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 836 } 837 ierr = PetscFree(column_values);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 #undef __FUNCT__ 842 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 843 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 844 { 845 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 846 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 847 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 848 PetscTruth isdraw,isascii,flg,isbinary; 849 PetscViewer sviewer; 850 PetscViewerFormat format; 851 852 PetscFunctionBegin; 853 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 854 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 855 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 856 if (isascii) { 857 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 858 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 859 MatInfo info; 860 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 861 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 862 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 863 if (flg) { 864 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 865 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 866 } else { 867 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 868 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 869 } 870 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 871 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 872 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 873 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 874 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 875 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } else if (format == PETSC_VIEWER_ASCII_INFO) { 878 PetscFunctionReturn(0); 879 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 880 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) 881 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 882 #endif 883 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 884 ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 885 #endif 886 887 PetscFunctionReturn(0); 888 } 889 } else if (isbinary) { 890 if (size == 1) { 891 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 892 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 893 } else { 894 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 895 } 896 PetscFunctionReturn(0); 897 } else if (isdraw) { 898 PetscDraw draw; 899 PetscTruth isnull; 900 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 901 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 902 } 903 904 if (size == 1) { 905 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 906 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 907 } else { 908 /* assemble the entire matrix onto first processor. */ 909 Mat A; 910 Mat_SeqAIJ *Aloc; 911 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 912 PetscScalar *a; 913 914 if (!rank) { 915 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 916 } else { 917 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 918 } 919 PetscLogObjectParent(mat,A); 920 921 /* copy over the A part */ 922 Aloc = (Mat_SeqAIJ*)aij->A->data; 923 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 924 row = aij->rstart; 925 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 926 for (i=0; i<m; i++) { 927 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 928 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 929 } 930 aj = Aloc->j; 931 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 932 933 /* copy over the B part */ 934 Aloc = (Mat_SeqAIJ*)aij->B->data; 935 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 936 row = aij->rstart; 937 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 938 ct = cols; 939 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 940 for (i=0; i<m; i++) { 941 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 942 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 943 } 944 ierr = PetscFree(ct);CHKERRQ(ierr); 945 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 946 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 947 /* 948 Everyone has to call to draw the matrix since the graphics waits are 949 synchronized across all processors that share the PetscDraw object 950 */ 951 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 952 if (!rank) { 953 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 954 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 955 } 956 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 957 ierr = MatDestroy(A);CHKERRQ(ierr); 958 } 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatView_MPIAIJ" 964 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 965 { 966 int ierr; 967 PetscTruth isascii,isdraw,issocket,isbinary; 968 969 PetscFunctionBegin; 970 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 971 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 972 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 973 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 974 if (isascii || isdraw || isbinary || issocket) { 975 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 976 } else { 977 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 978 } 979 PetscFunctionReturn(0); 980 } 981 982 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatRelax_MPIAIJ" 986 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 987 { 988 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 989 int ierr; 990 Vec bb1; 991 PetscScalar mone=-1.0; 992 993 PetscFunctionBegin; 994 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 995 996 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 997 998 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 999 if (flag & SOR_ZERO_INITIAL_GUESS) { 1000 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1001 its--; 1002 } 1003 1004 while (its--) { 1005 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1007 1008 /* update rhs: bb1 = bb - B*x */ 1009 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1010 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1011 1012 /* local sweep */ 1013 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1014 CHKERRQ(ierr); 1015 } 1016 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1017 if (flag & SOR_ZERO_INITIAL_GUESS) { 1018 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1019 its--; 1020 } 1021 while (its--) { 1022 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1023 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1024 1025 /* update rhs: bb1 = bb - B*x */ 1026 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1027 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1028 1029 /* local sweep */ 1030 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1031 CHKERRQ(ierr); 1032 } 1033 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1034 if (flag & SOR_ZERO_INITIAL_GUESS) { 1035 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1036 its--; 1037 } 1038 while (its--) { 1039 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1040 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1041 1042 /* update rhs: bb1 = bb - B*x */ 1043 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1044 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1045 1046 /* local sweep */ 1047 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1048 CHKERRQ(ierr); 1049 } 1050 } else { 1051 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1052 } 1053 1054 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1055 PetscFunctionReturn(0); 1056 } 1057 1058 #undef __FUNCT__ 1059 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1060 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1061 { 1062 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1063 Mat A = mat->A,B = mat->B; 1064 int ierr; 1065 PetscReal isend[5],irecv[5]; 1066 1067 PetscFunctionBegin; 1068 info->block_size = 1.0; 1069 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1070 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1071 isend[3] = info->memory; isend[4] = info->mallocs; 1072 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1073 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1074 isend[3] += info->memory; isend[4] += info->mallocs; 1075 if (flag == MAT_LOCAL) { 1076 info->nz_used = isend[0]; 1077 info->nz_allocated = isend[1]; 1078 info->nz_unneeded = isend[2]; 1079 info->memory = isend[3]; 1080 info->mallocs = isend[4]; 1081 } else if (flag == MAT_GLOBAL_MAX) { 1082 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1083 info->nz_used = irecv[0]; 1084 info->nz_allocated = irecv[1]; 1085 info->nz_unneeded = irecv[2]; 1086 info->memory = irecv[3]; 1087 info->mallocs = irecv[4]; 1088 } else if (flag == MAT_GLOBAL_SUM) { 1089 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1090 info->nz_used = irecv[0]; 1091 info->nz_allocated = irecv[1]; 1092 info->nz_unneeded = irecv[2]; 1093 info->memory = irecv[3]; 1094 info->mallocs = irecv[4]; 1095 } 1096 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1097 info->fill_ratio_needed = 0; 1098 info->factor_mallocs = 0; 1099 info->rows_global = (double)matin->M; 1100 info->columns_global = (double)matin->N; 1101 info->rows_local = (double)matin->m; 1102 info->columns_local = (double)matin->N; 1103 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatSetOption_MPIAIJ" 1109 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1110 { 1111 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1112 int ierr; 1113 1114 PetscFunctionBegin; 1115 switch (op) { 1116 case MAT_NO_NEW_NONZERO_LOCATIONS: 1117 case MAT_YES_NEW_NONZERO_LOCATIONS: 1118 case MAT_COLUMNS_UNSORTED: 1119 case MAT_COLUMNS_SORTED: 1120 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1121 case MAT_KEEP_ZEROED_ROWS: 1122 case MAT_NEW_NONZERO_LOCATION_ERR: 1123 case MAT_USE_INODES: 1124 case MAT_DO_NOT_USE_INODES: 1125 case MAT_IGNORE_ZERO_ENTRIES: 1126 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1127 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1128 break; 1129 case MAT_ROW_ORIENTED: 1130 a->roworiented = PETSC_TRUE; 1131 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1132 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1133 break; 1134 case MAT_ROWS_SORTED: 1135 case MAT_ROWS_UNSORTED: 1136 case MAT_YES_NEW_DIAGONALS: 1137 case MAT_USE_SINGLE_PRECISION_SOLVES: 1138 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1139 break; 1140 case MAT_COLUMN_ORIENTED: 1141 a->roworiented = PETSC_FALSE; 1142 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1143 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1144 break; 1145 case MAT_IGNORE_OFF_PROC_ENTRIES: 1146 a->donotstash = PETSC_TRUE; 1147 break; 1148 case MAT_NO_NEW_DIAGONALS: 1149 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1150 default: 1151 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1152 } 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatGetRow_MPIAIJ" 1158 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1159 { 1160 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1161 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1162 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1163 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1164 int *cmap,*idx_p; 1165 1166 PetscFunctionBegin; 1167 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1168 mat->getrowactive = PETSC_TRUE; 1169 1170 if (!mat->rowvalues && (idx || v)) { 1171 /* 1172 allocate enough space to hold information from the longest row. 1173 */ 1174 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1175 int max = 1,tmp; 1176 for (i=0; i<matin->m; i++) { 1177 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1178 if (max < tmp) { max = tmp; } 1179 } 1180 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1181 mat->rowindices = (int*)(mat->rowvalues + max); 1182 } 1183 1184 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1185 lrow = row - rstart; 1186 1187 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1188 if (!v) {pvA = 0; pvB = 0;} 1189 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1190 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1191 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1192 nztot = nzA + nzB; 1193 1194 cmap = mat->garray; 1195 if (v || idx) { 1196 if (nztot) { 1197 /* Sort by increasing column numbers, assuming A and B already sorted */ 1198 int imark = -1; 1199 if (v) { 1200 *v = v_p = mat->rowvalues; 1201 for (i=0; i<nzB; i++) { 1202 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1203 else break; 1204 } 1205 imark = i; 1206 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1207 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1208 } 1209 if (idx) { 1210 *idx = idx_p = mat->rowindices; 1211 if (imark > -1) { 1212 for (i=0; i<imark; i++) { 1213 idx_p[i] = cmap[cworkB[i]]; 1214 } 1215 } else { 1216 for (i=0; i<nzB; i++) { 1217 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1218 else break; 1219 } 1220 imark = i; 1221 } 1222 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1223 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1224 } 1225 } else { 1226 if (idx) *idx = 0; 1227 if (v) *v = 0; 1228 } 1229 } 1230 *nz = nztot; 1231 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1232 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1238 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1239 { 1240 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1241 1242 PetscFunctionBegin; 1243 if (aij->getrowactive == PETSC_FALSE) { 1244 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1245 } 1246 aij->getrowactive = PETSC_FALSE; 1247 PetscFunctionReturn(0); 1248 } 1249 1250 #undef __FUNCT__ 1251 #define __FUNCT__ "MatNorm_MPIAIJ" 1252 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1253 { 1254 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1255 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1256 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1257 PetscReal sum = 0.0; 1258 PetscScalar *v; 1259 1260 PetscFunctionBegin; 1261 if (aij->size == 1) { 1262 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1263 } else { 1264 if (type == NORM_FROBENIUS) { 1265 v = amat->a; 1266 for (i=0; i<amat->nz; i++) { 1267 #if defined(PETSC_USE_COMPLEX) 1268 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1269 #else 1270 sum += (*v)*(*v); v++; 1271 #endif 1272 } 1273 v = bmat->a; 1274 for (i=0; i<bmat->nz; i++) { 1275 #if defined(PETSC_USE_COMPLEX) 1276 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1277 #else 1278 sum += (*v)*(*v); v++; 1279 #endif 1280 } 1281 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1282 *norm = sqrt(*norm); 1283 } else if (type == NORM_1) { /* max column norm */ 1284 PetscReal *tmp,*tmp2; 1285 int *jj,*garray = aij->garray; 1286 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1287 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1288 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1289 *norm = 0.0; 1290 v = amat->a; jj = amat->j; 1291 for (j=0; j<amat->nz; j++) { 1292 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1293 } 1294 v = bmat->a; jj = bmat->j; 1295 for (j=0; j<bmat->nz; j++) { 1296 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1297 } 1298 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1299 for (j=0; j<mat->N; j++) { 1300 if (tmp2[j] > *norm) *norm = tmp2[j]; 1301 } 1302 ierr = PetscFree(tmp);CHKERRQ(ierr); 1303 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1304 } else if (type == NORM_INFINITY) { /* max row norm */ 1305 PetscReal ntemp = 0.0; 1306 for (j=0; j<aij->A->m; j++) { 1307 v = amat->a + amat->i[j] + shift; 1308 sum = 0.0; 1309 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1310 sum += PetscAbsScalar(*v); v++; 1311 } 1312 v = bmat->a + bmat->i[j] + shift; 1313 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1314 sum += PetscAbsScalar(*v); v++; 1315 } 1316 if (sum > ntemp) ntemp = sum; 1317 } 1318 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1319 } else { 1320 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1321 } 1322 } 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatTranspose_MPIAIJ" 1328 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1329 { 1330 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1331 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1332 int ierr,shift = Aloc->indexshift; 1333 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1334 Mat B; 1335 PetscScalar *array; 1336 1337 PetscFunctionBegin; 1338 if (!matout && M != N) { 1339 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1340 } 1341 1342 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1343 1344 /* copy over the A part */ 1345 Aloc = (Mat_SeqAIJ*)a->A->data; 1346 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1347 row = a->rstart; 1348 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1349 for (i=0; i<m; i++) { 1350 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1351 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1352 } 1353 aj = Aloc->j; 1354 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1355 1356 /* copy over the B part */ 1357 Aloc = (Mat_SeqAIJ*)a->B->data; 1358 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1359 row = a->rstart; 1360 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1361 ct = cols; 1362 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1363 for (i=0; i<m; i++) { 1364 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1365 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1366 } 1367 ierr = PetscFree(ct);CHKERRQ(ierr); 1368 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1369 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1370 if (matout) { 1371 *matout = B; 1372 } else { 1373 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1374 } 1375 PetscFunctionReturn(0); 1376 } 1377 1378 #undef __FUNCT__ 1379 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1380 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1381 { 1382 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1383 Mat a = aij->A,b = aij->B; 1384 int ierr,s1,s2,s3; 1385 1386 PetscFunctionBegin; 1387 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1388 if (rr) { 1389 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1390 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1391 /* Overlap communication with computation. */ 1392 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1393 } 1394 if (ll) { 1395 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1396 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1397 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1398 } 1399 /* scale the diagonal block */ 1400 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1401 1402 if (rr) { 1403 /* Do a scatter end and then right scale the off-diagonal block */ 1404 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1405 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1406 } 1407 1408 PetscFunctionReturn(0); 1409 } 1410 1411 1412 #undef __FUNCT__ 1413 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1414 int MatPrintHelp_MPIAIJ(Mat A) 1415 { 1416 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1417 int ierr; 1418 1419 PetscFunctionBegin; 1420 if (!a->rank) { 1421 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1422 } 1423 PetscFunctionReturn(0); 1424 } 1425 1426 #undef __FUNCT__ 1427 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1428 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1429 { 1430 PetscFunctionBegin; 1431 *bs = 1; 1432 PetscFunctionReturn(0); 1433 } 1434 #undef __FUNCT__ 1435 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1436 int MatSetUnfactored_MPIAIJ(Mat A) 1437 { 1438 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1439 int ierr; 1440 1441 PetscFunctionBegin; 1442 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1443 PetscFunctionReturn(0); 1444 } 1445 1446 #undef __FUNCT__ 1447 #define __FUNCT__ "MatEqual_MPIAIJ" 1448 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1449 { 1450 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1451 Mat a,b,c,d; 1452 PetscTruth flg; 1453 int ierr; 1454 1455 PetscFunctionBegin; 1456 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1457 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1458 a = matA->A; b = matA->B; 1459 c = matB->A; d = matB->B; 1460 1461 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1462 if (flg == PETSC_TRUE) { 1463 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1464 } 1465 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1466 PetscFunctionReturn(0); 1467 } 1468 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatCopy_MPIAIJ" 1471 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1472 { 1473 int ierr; 1474 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1475 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1476 PetscTruth flg; 1477 1478 PetscFunctionBegin; 1479 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1480 if (str != SAME_NONZERO_PATTERN || !flg) { 1481 /* because of the column compression in the off-processor part of the matrix a->B, 1482 the number of columns in a->B and b->B may be different, hence we cannot call 1483 the MatCopy() directly on the two parts. If need be, we can provide a more 1484 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1485 then copying the submatrices */ 1486 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1487 } else { 1488 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1489 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1490 } 1491 PetscFunctionReturn(0); 1492 } 1493 1494 #undef __FUNCT__ 1495 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1496 int MatSetUpPreallocation_MPIAIJ(Mat A) 1497 { 1498 int ierr; 1499 1500 PetscFunctionBegin; 1501 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1502 PetscFunctionReturn(0); 1503 } 1504 1505 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1506 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1507 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1508 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1509 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1510 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1511 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1512 #endif 1513 1514 #include "petscblaslapack.h" 1515 1516 #undef __FUNCT__ 1517 #define __FUNCT__ "MatAXPY_MPIAIJ" 1518 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1519 { 1520 int ierr,one; 1521 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1522 Mat_SeqAIJ *x,*y; 1523 1524 PetscFunctionBegin; 1525 if (str == SAME_NONZERO_PATTERN) { 1526 x = (Mat_SeqAIJ *)xx->A->data; 1527 y = (Mat_SeqAIJ *)yy->A->data; 1528 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1529 x = (Mat_SeqAIJ *)xx->B->data; 1530 y = (Mat_SeqAIJ *)yy->B->data; 1531 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1532 } else { 1533 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1534 } 1535 PetscFunctionReturn(0); 1536 } 1537 1538 /* -------------------------------------------------------------------*/ 1539 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1540 MatGetRow_MPIAIJ, 1541 MatRestoreRow_MPIAIJ, 1542 MatMult_MPIAIJ, 1543 MatMultAdd_MPIAIJ, 1544 MatMultTranspose_MPIAIJ, 1545 MatMultTransposeAdd_MPIAIJ, 1546 0, 1547 0, 1548 0, 1549 0, 1550 0, 1551 0, 1552 MatRelax_MPIAIJ, 1553 MatTranspose_MPIAIJ, 1554 MatGetInfo_MPIAIJ, 1555 MatEqual_MPIAIJ, 1556 MatGetDiagonal_MPIAIJ, 1557 MatDiagonalScale_MPIAIJ, 1558 MatNorm_MPIAIJ, 1559 MatAssemblyBegin_MPIAIJ, 1560 MatAssemblyEnd_MPIAIJ, 1561 0, 1562 MatSetOption_MPIAIJ, 1563 MatZeroEntries_MPIAIJ, 1564 MatZeroRows_MPIAIJ, 1565 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1566 MatLUFactorSymbolic_MPIAIJ_TFS, 1567 #else 1568 0, 1569 #endif 1570 0, 1571 0, 1572 0, 1573 MatSetUpPreallocation_MPIAIJ, 1574 0, 1575 0, 1576 0, 1577 0, 1578 MatDuplicate_MPIAIJ, 1579 0, 1580 0, 1581 0, 1582 0, 1583 MatAXPY_MPIAIJ, 1584 MatGetSubMatrices_MPIAIJ, 1585 MatIncreaseOverlap_MPIAIJ, 1586 MatGetValues_MPIAIJ, 1587 MatCopy_MPIAIJ, 1588 MatPrintHelp_MPIAIJ, 1589 MatScale_MPIAIJ, 1590 0, 1591 0, 1592 0, 1593 MatGetBlockSize_MPIAIJ, 1594 0, 1595 0, 1596 0, 1597 0, 1598 MatFDColoringCreate_MPIAIJ, 1599 0, 1600 MatSetUnfactored_MPIAIJ, 1601 0, 1602 0, 1603 MatGetSubMatrix_MPIAIJ, 1604 MatDestroy_MPIAIJ, 1605 MatView_MPIAIJ, 1606 MatGetPetscMaps_Petsc, 1607 0, 1608 0, 1609 0, 1610 0, 1611 0, 1612 0, 1613 0, 1614 0, 1615 MatSetColoring_MPIAIJ, 1616 MatSetValuesAdic_MPIAIJ, 1617 MatSetValuesAdifor_MPIAIJ 1618 }; 1619 1620 /* ----------------------------------------------------------------------------------------*/ 1621 1622 EXTERN_C_BEGIN 1623 #undef __FUNCT__ 1624 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1625 int MatStoreValues_MPIAIJ(Mat mat) 1626 { 1627 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1628 int ierr; 1629 1630 PetscFunctionBegin; 1631 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1632 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1633 PetscFunctionReturn(0); 1634 } 1635 EXTERN_C_END 1636 1637 EXTERN_C_BEGIN 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1640 int MatRetrieveValues_MPIAIJ(Mat mat) 1641 { 1642 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1643 int ierr; 1644 1645 PetscFunctionBegin; 1646 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1647 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1648 PetscFunctionReturn(0); 1649 } 1650 EXTERN_C_END 1651 1652 #include "petscpc.h" 1653 EXTERN_C_BEGIN 1654 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1655 EXTERN_C_END 1656 1657 EXTERN_C_BEGIN 1658 #undef __FUNCT__ 1659 #define __FUNCT__ "MatCreate_MPIAIJ" 1660 int MatCreate_MPIAIJ(Mat B) 1661 { 1662 Mat_MPIAIJ *b; 1663 int ierr,i,size; 1664 1665 PetscFunctionBegin; 1666 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1667 1668 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1669 B->data = (void*)b; 1670 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1671 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1672 B->factor = 0; 1673 B->assembled = PETSC_FALSE; 1674 B->mapping = 0; 1675 1676 B->insertmode = NOT_SET_VALUES; 1677 b->size = size; 1678 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1679 1680 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1681 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1682 1683 /* the information in the maps duplicates the information computed below, eventually 1684 we should remove the duplicate information that is not contained in the maps */ 1685 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1686 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1687 1688 /* build local table of row and column ownerships */ 1689 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1690 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1691 b->cowners = b->rowners + b->size + 2; 1692 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1693 b->rowners[0] = 0; 1694 for (i=2; i<=b->size; i++) { 1695 b->rowners[i] += b->rowners[i-1]; 1696 } 1697 b->rstart = b->rowners[b->rank]; 1698 b->rend = b->rowners[b->rank+1]; 1699 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1700 b->cowners[0] = 0; 1701 for (i=2; i<=b->size; i++) { 1702 b->cowners[i] += b->cowners[i-1]; 1703 } 1704 b->cstart = b->cowners[b->rank]; 1705 b->cend = b->cowners[b->rank+1]; 1706 1707 /* build cache for off array entries formed */ 1708 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1709 b->donotstash = PETSC_FALSE; 1710 b->colmap = 0; 1711 b->garray = 0; 1712 b->roworiented = PETSC_TRUE; 1713 1714 /* stuff used for matrix vector multiply */ 1715 b->lvec = PETSC_NULL; 1716 b->Mvctx = PETSC_NULL; 1717 1718 /* stuff for MatGetRow() */ 1719 b->rowindices = 0; 1720 b->rowvalues = 0; 1721 b->getrowactive = PETSC_FALSE; 1722 1723 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1724 "MatStoreValues_MPIAIJ", 1725 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1726 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1727 "MatRetrieveValues_MPIAIJ", 1728 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1729 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1730 "MatGetDiagonalBlock_MPIAIJ", 1731 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1732 1733 PetscFunctionReturn(0); 1734 } 1735 EXTERN_C_END 1736 1737 #undef __FUNCT__ 1738 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1739 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1740 { 1741 Mat mat; 1742 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1743 int ierr; 1744 1745 PetscFunctionBegin; 1746 *newmat = 0; 1747 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1748 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1749 a = (Mat_MPIAIJ*)mat->data; 1750 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1751 mat->factor = matin->factor; 1752 mat->assembled = PETSC_TRUE; 1753 mat->insertmode = NOT_SET_VALUES; 1754 mat->preallocated = PETSC_TRUE; 1755 1756 a->rstart = oldmat->rstart; 1757 a->rend = oldmat->rend; 1758 a->cstart = oldmat->cstart; 1759 a->cend = oldmat->cend; 1760 a->size = oldmat->size; 1761 a->rank = oldmat->rank; 1762 a->donotstash = oldmat->donotstash; 1763 a->roworiented = oldmat->roworiented; 1764 a->rowindices = 0; 1765 a->rowvalues = 0; 1766 a->getrowactive = PETSC_FALSE; 1767 1768 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1769 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1770 if (oldmat->colmap) { 1771 #if defined (PETSC_USE_CTABLE) 1772 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1773 #else 1774 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1775 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1776 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1777 #endif 1778 } else a->colmap = 0; 1779 if (oldmat->garray) { 1780 int len; 1781 len = oldmat->B->n; 1782 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1783 PetscLogObjectMemory(mat,len*sizeof(int)); 1784 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1785 } else a->garray = 0; 1786 1787 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1788 PetscLogObjectParent(mat,a->lvec); 1789 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1790 PetscLogObjectParent(mat,a->Mvctx); 1791 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1792 PetscLogObjectParent(mat,a->A); 1793 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1794 PetscLogObjectParent(mat,a->B); 1795 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1796 *newmat = mat; 1797 PetscFunctionReturn(0); 1798 } 1799 1800 #include "petscsys.h" 1801 1802 EXTERN_C_BEGIN 1803 #undef __FUNCT__ 1804 #define __FUNCT__ "MatLoad_MPIAIJ" 1805 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1806 { 1807 Mat A; 1808 PetscScalar *vals,*svals; 1809 MPI_Comm comm = ((PetscObject)viewer)->comm; 1810 MPI_Status status; 1811 int i,nz,ierr,j,rstart,rend,fd; 1812 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1813 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1814 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1815 1816 PetscFunctionBegin; 1817 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1818 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1819 if (!rank) { 1820 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1821 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1822 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1823 if (header[3] < 0) { 1824 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1825 } 1826 } 1827 1828 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1829 M = header[1]; N = header[2]; 1830 /* determine ownership of all rows */ 1831 m = M/size + ((M % size) > rank); 1832 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1833 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1834 rowners[0] = 0; 1835 for (i=2; i<=size; i++) { 1836 rowners[i] += rowners[i-1]; 1837 } 1838 rstart = rowners[rank]; 1839 rend = rowners[rank+1]; 1840 1841 /* distribute row lengths to all processors */ 1842 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1843 offlens = ourlens + (rend-rstart); 1844 if (!rank) { 1845 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1846 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1847 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1848 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1849 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1850 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1851 } else { 1852 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1853 } 1854 1855 if (!rank) { 1856 /* calculate the number of nonzeros on each processor */ 1857 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1858 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1859 for (i=0; i<size; i++) { 1860 for (j=rowners[i]; j< rowners[i+1]; j++) { 1861 procsnz[i] += rowlengths[j]; 1862 } 1863 } 1864 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1865 1866 /* determine max buffer needed and allocate it */ 1867 maxnz = 0; 1868 for (i=0; i<size; i++) { 1869 maxnz = PetscMax(maxnz,procsnz[i]); 1870 } 1871 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1872 1873 /* read in my part of the matrix column indices */ 1874 nz = procsnz[0]; 1875 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1876 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1877 1878 /* read in every one elses and ship off */ 1879 for (i=1; i<size; i++) { 1880 nz = procsnz[i]; 1881 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1882 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1883 } 1884 ierr = PetscFree(cols);CHKERRQ(ierr); 1885 } else { 1886 /* determine buffer space needed for message */ 1887 nz = 0; 1888 for (i=0; i<m; i++) { 1889 nz += ourlens[i]; 1890 } 1891 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1892 1893 /* receive message of column indices*/ 1894 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1895 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1896 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1897 } 1898 1899 /* determine column ownership if matrix is not square */ 1900 if (N != M) { 1901 n = N/size + ((N % size) > rank); 1902 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1903 cstart = cend - n; 1904 } else { 1905 cstart = rstart; 1906 cend = rend; 1907 n = cend - cstart; 1908 } 1909 1910 /* loop over local rows, determining number of off diagonal entries */ 1911 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1912 jj = 0; 1913 for (i=0; i<m; i++) { 1914 for (j=0; j<ourlens[i]; j++) { 1915 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1916 jj++; 1917 } 1918 } 1919 1920 /* create our matrix */ 1921 for (i=0; i<m; i++) { 1922 ourlens[i] -= offlens[i]; 1923 } 1924 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1925 A = *newmat; 1926 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1927 for (i=0; i<m; i++) { 1928 ourlens[i] += offlens[i]; 1929 } 1930 1931 if (!rank) { 1932 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1933 1934 /* read in my part of the matrix numerical values */ 1935 nz = procsnz[0]; 1936 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1937 1938 /* insert into matrix */ 1939 jj = rstart; 1940 smycols = mycols; 1941 svals = vals; 1942 for (i=0; i<m; i++) { 1943 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1944 smycols += ourlens[i]; 1945 svals += ourlens[i]; 1946 jj++; 1947 } 1948 1949 /* read in other processors and ship out */ 1950 for (i=1; i<size; i++) { 1951 nz = procsnz[i]; 1952 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1953 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1954 } 1955 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1956 } else { 1957 /* receive numeric values */ 1958 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1959 1960 /* receive message of values*/ 1961 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1962 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1963 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1964 1965 /* insert into matrix */ 1966 jj = rstart; 1967 smycols = mycols; 1968 svals = vals; 1969 for (i=0; i<m; i++) { 1970 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1971 smycols += ourlens[i]; 1972 svals += ourlens[i]; 1973 jj++; 1974 } 1975 } 1976 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1977 ierr = PetscFree(vals);CHKERRQ(ierr); 1978 ierr = PetscFree(mycols);CHKERRQ(ierr); 1979 ierr = PetscFree(rowners);CHKERRQ(ierr); 1980 1981 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1982 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1983 PetscFunctionReturn(0); 1984 } 1985 EXTERN_C_END 1986 1987 #undef __FUNCT__ 1988 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1989 /* 1990 Not great since it makes two copies of the submatrix, first an SeqAIJ 1991 in local and then by concatenating the local matrices the end result. 1992 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1993 */ 1994 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1995 { 1996 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1997 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1998 Mat *local,M,Mreuse; 1999 PetscScalar *vwork,*aa; 2000 MPI_Comm comm = mat->comm; 2001 Mat_SeqAIJ *aij; 2002 2003 2004 PetscFunctionBegin; 2005 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2006 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2007 2008 if (call == MAT_REUSE_MATRIX) { 2009 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2010 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2011 local = &Mreuse; 2012 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2013 } else { 2014 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2015 Mreuse = *local; 2016 ierr = PetscFree(local);CHKERRQ(ierr); 2017 } 2018 2019 /* 2020 m - number of local rows 2021 n - number of columns (same on all processors) 2022 rstart - first row in new global matrix generated 2023 */ 2024 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2025 if (call == MAT_INITIAL_MATRIX) { 2026 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2027 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 2028 ii = aij->i; 2029 jj = aij->j; 2030 2031 /* 2032 Determine the number of non-zeros in the diagonal and off-diagonal 2033 portions of the matrix in order to do correct preallocation 2034 */ 2035 2036 /* first get start and end of "diagonal" columns */ 2037 if (csize == PETSC_DECIDE) { 2038 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2039 if (mglobal == n) { /* square matrix */ 2040 nlocal = m; 2041 } else { 2042 nlocal = n/size + ((n % size) > rank); 2043 } 2044 } else { 2045 nlocal = csize; 2046 } 2047 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2048 rstart = rend - nlocal; 2049 if (rank == size - 1 && rend != n) { 2050 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 2051 } 2052 2053 /* next, compute all the lengths */ 2054 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2055 olens = dlens + m; 2056 for (i=0; i<m; i++) { 2057 jend = ii[i+1] - ii[i]; 2058 olen = 0; 2059 dlen = 0; 2060 for (j=0; j<jend; j++) { 2061 if (*jj < rstart || *jj >= rend) olen++; 2062 else dlen++; 2063 jj++; 2064 } 2065 olens[i] = olen; 2066 dlens[i] = dlen; 2067 } 2068 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2069 ierr = PetscFree(dlens);CHKERRQ(ierr); 2070 } else { 2071 int ml,nl; 2072 2073 M = *newmat; 2074 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2075 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2076 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2077 /* 2078 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2079 rather than the slower MatSetValues(). 2080 */ 2081 M->was_assembled = PETSC_TRUE; 2082 M->assembled = PETSC_FALSE; 2083 } 2084 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2085 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2086 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 2087 ii = aij->i; 2088 jj = aij->j; 2089 aa = aij->a; 2090 for (i=0; i<m; i++) { 2091 row = rstart + i; 2092 nz = ii[i+1] - ii[i]; 2093 cwork = jj; jj += nz; 2094 vwork = aa; aa += nz; 2095 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2096 } 2097 2098 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2099 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2100 *newmat = M; 2101 2102 /* save submatrix used in processor for next request */ 2103 if (call == MAT_INITIAL_MATRIX) { 2104 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2105 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2106 } 2107 2108 PetscFunctionReturn(0); 2109 } 2110 2111 #undef __FUNCT__ 2112 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2113 /*@C 2114 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2115 (the default parallel PETSc format). For good matrix assembly performance 2116 the user should preallocate the matrix storage by setting the parameters 2117 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2118 performance can be increased by more than a factor of 50. 2119 2120 Collective on MPI_Comm 2121 2122 Input Parameters: 2123 + A - the matrix 2124 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2125 (same value is used for all local rows) 2126 . d_nnz - array containing the number of nonzeros in the various rows of the 2127 DIAGONAL portion of the local submatrix (possibly different for each row) 2128 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2129 The size of this array is equal to the number of local rows, i.e 'm'. 2130 You must leave room for the diagonal entry even if it is zero. 2131 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2132 submatrix (same value is used for all local rows). 2133 - o_nnz - array containing the number of nonzeros in the various rows of the 2134 OFF-DIAGONAL portion of the local submatrix (possibly different for 2135 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2136 structure. The size of this array is equal to the number 2137 of local rows, i.e 'm'. 2138 2139 The AIJ format (also called the Yale sparse matrix format or 2140 compressed row storage), is fully compatible with standard Fortran 77 2141 storage. That is, the stored row and column indices can begin at 2142 either one (as in Fortran) or zero. See the users manual for details. 2143 2144 The user MUST specify either the local or global matrix dimensions 2145 (possibly both). 2146 2147 The parallel matrix is partitioned such that the first m0 rows belong to 2148 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2149 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2150 2151 The DIAGONAL portion of the local submatrix of a processor can be defined 2152 as the submatrix which is obtained by extraction the part corresponding 2153 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2154 first row that belongs to the processor, and r2 is the last row belonging 2155 to the this processor. This is a square mxm matrix. The remaining portion 2156 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2157 2158 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2159 2160 By default, this format uses inodes (identical nodes) when possible. 2161 We search for consecutive rows with the same nonzero structure, thereby 2162 reusing matrix information to achieve increased efficiency. 2163 2164 Options Database Keys: 2165 + -mat_aij_no_inode - Do not use inodes 2166 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2167 - -mat_aij_oneindex - Internally use indexing starting at 1 2168 rather than 0. Note that when calling MatSetValues(), 2169 the user still MUST index entries starting at 0! 2170 2171 Example usage: 2172 2173 Consider the following 8x8 matrix with 34 non-zero values, that is 2174 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2175 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2176 as follows: 2177 2178 .vb 2179 1 2 0 | 0 3 0 | 0 4 2180 Proc0 0 5 6 | 7 0 0 | 8 0 2181 9 0 10 | 11 0 0 | 12 0 2182 ------------------------------------- 2183 13 0 14 | 15 16 17 | 0 0 2184 Proc1 0 18 0 | 19 20 21 | 0 0 2185 0 0 0 | 22 23 0 | 24 0 2186 ------------------------------------- 2187 Proc2 25 26 27 | 0 0 28 | 29 0 2188 30 0 0 | 31 32 33 | 0 34 2189 .ve 2190 2191 This can be represented as a collection of submatrices as: 2192 2193 .vb 2194 A B C 2195 D E F 2196 G H I 2197 .ve 2198 2199 Where the submatrices A,B,C are owned by proc0, D,E,F are 2200 owned by proc1, G,H,I are owned by proc2. 2201 2202 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2203 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2204 The 'M','N' parameters are 8,8, and have the same values on all procs. 2205 2206 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2207 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2208 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2209 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2210 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2211 matrix, ans [DF] as another SeqAIJ matrix. 2212 2213 When d_nz, o_nz parameters are specified, d_nz storage elements are 2214 allocated for every row of the local diagonal submatrix, and o_nz 2215 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2216 One way to choose d_nz and o_nz is to use the max nonzerors per local 2217 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2218 In this case, the values of d_nz,o_nz are: 2219 .vb 2220 proc0 : dnz = 2, o_nz = 2 2221 proc1 : dnz = 3, o_nz = 2 2222 proc2 : dnz = 1, o_nz = 4 2223 .ve 2224 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2225 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2226 for proc3. i.e we are using 12+15+10=37 storage locations to store 2227 34 values. 2228 2229 When d_nnz, o_nnz parameters are specified, the storage is specified 2230 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2231 In the above case the values for d_nnz,o_nnz are: 2232 .vb 2233 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2234 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2235 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2236 .ve 2237 Here the space allocated is sum of all the above values i.e 34, and 2238 hence pre-allocation is perfect. 2239 2240 Level: intermediate 2241 2242 .keywords: matrix, aij, compressed row, sparse, parallel 2243 2244 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2245 @*/ 2246 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2247 { 2248 Mat_MPIAIJ *b; 2249 int ierr,i; 2250 PetscTruth flg2; 2251 2252 PetscFunctionBegin; 2253 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2254 if (!flg2) PetscFunctionReturn(0); 2255 B->preallocated = PETSC_TRUE; 2256 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2257 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2258 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2259 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2260 if (d_nnz) { 2261 for (i=0; i<B->m; i++) { 2262 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2263 } 2264 } 2265 if (o_nnz) { 2266 for (i=0; i<B->m; i++) { 2267 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2268 } 2269 } 2270 b = (Mat_MPIAIJ*)B->data; 2271 2272 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2273 PetscLogObjectParent(B,b->A); 2274 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2275 PetscLogObjectParent(B,b->B); 2276 2277 PetscFunctionReturn(0); 2278 } 2279 2280 #undef __FUNCT__ 2281 #define __FUNCT__ "MatCreateMPIAIJ" 2282 /*@C 2283 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2284 (the default parallel PETSc format). For good matrix assembly performance 2285 the user should preallocate the matrix storage by setting the parameters 2286 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2287 performance can be increased by more than a factor of 50. 2288 2289 Collective on MPI_Comm 2290 2291 Input Parameters: 2292 + comm - MPI communicator 2293 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2294 This value should be the same as the local size used in creating the 2295 y vector for the matrix-vector product y = Ax. 2296 . n - This value should be the same as the local size used in creating the 2297 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2298 calculated if N is given) For square matrices n is almost always m. 2299 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2300 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2301 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2302 (same value is used for all local rows) 2303 . d_nnz - array containing the number of nonzeros in the various rows of the 2304 DIAGONAL portion of the local submatrix (possibly different for each row) 2305 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2306 The size of this array is equal to the number of local rows, i.e 'm'. 2307 You must leave room for the diagonal entry even if it is zero. 2308 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2309 submatrix (same value is used for all local rows). 2310 - o_nnz - array containing the number of nonzeros in the various rows of the 2311 OFF-DIAGONAL portion of the local submatrix (possibly different for 2312 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2313 structure. The size of this array is equal to the number 2314 of local rows, i.e 'm'. 2315 2316 Output Parameter: 2317 . A - the matrix 2318 2319 Notes: 2320 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2321 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2322 storage requirements for this matrix. 2323 2324 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2325 processor than it must be used on all processors that share the object for 2326 that argument. 2327 2328 The AIJ format (also called the Yale sparse matrix format or 2329 compressed row storage), is fully compatible with standard Fortran 77 2330 storage. That is, the stored row and column indices can begin at 2331 either one (as in Fortran) or zero. See the users manual for details. 2332 2333 The user MUST specify either the local or global matrix dimensions 2334 (possibly both). 2335 2336 The parallel matrix is partitioned such that the first m0 rows belong to 2337 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2338 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2339 2340 The DIAGONAL portion of the local submatrix of a processor can be defined 2341 as the submatrix which is obtained by extraction the part corresponding 2342 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2343 first row that belongs to the processor, and r2 is the last row belonging 2344 to the this processor. This is a square mxm matrix. The remaining portion 2345 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2346 2347 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2348 2349 By default, this format uses inodes (identical nodes) when possible. 2350 We search for consecutive rows with the same nonzero structure, thereby 2351 reusing matrix information to achieve increased efficiency. 2352 2353 Options Database Keys: 2354 + -mat_aij_no_inode - Do not use inodes 2355 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2356 - -mat_aij_oneindex - Internally use indexing starting at 1 2357 rather than 0. Note that when calling MatSetValues(), 2358 the user still MUST index entries starting at 0! 2359 2360 2361 Example usage: 2362 2363 Consider the following 8x8 matrix with 34 non-zero values, that is 2364 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2365 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2366 as follows: 2367 2368 .vb 2369 1 2 0 | 0 3 0 | 0 4 2370 Proc0 0 5 6 | 7 0 0 | 8 0 2371 9 0 10 | 11 0 0 | 12 0 2372 ------------------------------------- 2373 13 0 14 | 15 16 17 | 0 0 2374 Proc1 0 18 0 | 19 20 21 | 0 0 2375 0 0 0 | 22 23 0 | 24 0 2376 ------------------------------------- 2377 Proc2 25 26 27 | 0 0 28 | 29 0 2378 30 0 0 | 31 32 33 | 0 34 2379 .ve 2380 2381 This can be represented as a collection of submatrices as: 2382 2383 .vb 2384 A B C 2385 D E F 2386 G H I 2387 .ve 2388 2389 Where the submatrices A,B,C are owned by proc0, D,E,F are 2390 owned by proc1, G,H,I are owned by proc2. 2391 2392 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2393 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2394 The 'M','N' parameters are 8,8, and have the same values on all procs. 2395 2396 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2397 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2398 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2399 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2400 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2401 matrix, ans [DF] as another SeqAIJ matrix. 2402 2403 When d_nz, o_nz parameters are specified, d_nz storage elements are 2404 allocated for every row of the local diagonal submatrix, and o_nz 2405 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2406 One way to choose d_nz and o_nz is to use the max nonzerors per local 2407 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2408 In this case, the values of d_nz,o_nz are: 2409 .vb 2410 proc0 : dnz = 2, o_nz = 2 2411 proc1 : dnz = 3, o_nz = 2 2412 proc2 : dnz = 1, o_nz = 4 2413 .ve 2414 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2415 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2416 for proc3. i.e we are using 12+15+10=37 storage locations to store 2417 34 values. 2418 2419 When d_nnz, o_nnz parameters are specified, the storage is specified 2420 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2421 In the above case the values for d_nnz,o_nnz are: 2422 .vb 2423 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2424 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2425 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2426 .ve 2427 Here the space allocated is sum of all the above values i.e 34, and 2428 hence pre-allocation is perfect. 2429 2430 Level: intermediate 2431 2432 .keywords: matrix, aij, compressed row, sparse, parallel 2433 2434 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2435 @*/ 2436 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2437 { 2438 int ierr,size; 2439 2440 PetscFunctionBegin; 2441 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2442 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2443 if (size > 1) { 2444 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2445 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2446 } else { 2447 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2448 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2449 } 2450 PetscFunctionReturn(0); 2451 } 2452 2453 #undef __FUNCT__ 2454 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2455 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2456 { 2457 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2458 PetscFunctionBegin; 2459 *Ad = a->A; 2460 *Ao = a->B; 2461 *colmap = a->garray; 2462 PetscFunctionReturn(0); 2463 } 2464 2465 #undef __FUNCT__ 2466 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2467 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2468 { 2469 int ierr; 2470 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2471 2472 PetscFunctionBegin; 2473 if (coloring->ctype == IS_COLORING_LOCAL) { 2474 int *allcolors,*colors,i; 2475 ISColoring ocoloring; 2476 2477 /* set coloring for diagonal portion */ 2478 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2479 2480 /* set coloring for off-diagonal portion */ 2481 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2482 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2483 for (i=0; i<a->B->n; i++) { 2484 colors[i] = allcolors[a->garray[i]]; 2485 } 2486 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2487 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2488 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2489 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2490 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2491 int *colors,i,*larray; 2492 ISColoring ocoloring; 2493 2494 /* set coloring for diagonal portion */ 2495 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2496 for (i=0; i<a->A->n; i++) { 2497 larray[i] = i + a->cstart; 2498 } 2499 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2500 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2501 for (i=0; i<a->A->n; i++) { 2502 colors[i] = coloring->colors[larray[i]]; 2503 } 2504 ierr = PetscFree(larray);CHKERRQ(ierr); 2505 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2506 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2507 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2508 2509 /* set coloring for off-diagonal portion */ 2510 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2511 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2512 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2513 for (i=0; i<a->B->n; i++) { 2514 colors[i] = coloring->colors[larray[i]]; 2515 } 2516 ierr = PetscFree(larray);CHKERRQ(ierr); 2517 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2518 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2519 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2520 } else { 2521 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2522 } 2523 2524 PetscFunctionReturn(0); 2525 } 2526 2527 #undef __FUNCT__ 2528 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2529 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2530 { 2531 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2532 int ierr; 2533 2534 PetscFunctionBegin; 2535 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2536 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2542 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2543 { 2544 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2545 int ierr; 2546 2547 PetscFunctionBegin; 2548 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2549 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2550 PetscFunctionReturn(0); 2551 } 2552 2553 #undef __FUNCT__ 2554 #define __FUNCT__ "MatMerge" 2555 /*@C 2556 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2557 matrices from each processor 2558 2559 Collective on MPI_Comm 2560 2561 Input Parameters: 2562 + comm - the communicators the parallel matrix will live on 2563 - inmat - the input sequential matrices 2564 2565 Output Parameter: 2566 . outmat - the parallel matrix generated 2567 2568 Level: advanced 2569 2570 Notes: The number of columns of the matrix in EACH of the seperate files 2571 MUST be the same. 2572 2573 @*/ 2574 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2575 { 2576 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2577 PetscScalar *values; 2578 PetscMap columnmap,rowmap; 2579 2580 PetscFunctionBegin; 2581 2582 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2583 2584 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2585 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2586 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2587 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2588 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2589 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2590 2591 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2592 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2593 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2594 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2595 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2596 2597 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2598 for (i=0;i<m;i++) { 2599 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2600 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2601 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2602 } 2603 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2604 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2605 2606 for (i=0;i<m;i++) { 2607 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2608 I = i + rstart; 2609 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2610 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2611 } 2612 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2613 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2614 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2615 2616 PetscFunctionReturn(0); 2617 } 2618 2619 #undef __FUNCT__ 2620 #define __FUNCT__ "MatFileSplit" 2621 int MatFileSplit(Mat A,char *outfile) 2622 { 2623 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2624 PetscViewer out; 2625 char *name; 2626 Mat B; 2627 PetscScalar *values; 2628 2629 PetscFunctionBegin; 2630 2631 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2632 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2633 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2634 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2635 for (i=0;i<m;i++) { 2636 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2637 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2638 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2639 } 2640 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2641 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2642 2643 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2644 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2645 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2646 sprintf(name,"%s.%d",outfile,rank); 2647 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2648 ierr = PetscFree(name); 2649 ierr = MatView(B,out);CHKERRQ(ierr); 2650 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2651 ierr = MatDestroy(B);CHKERRQ(ierr); 2652 PetscFunctionReturn(0); 2653 } 2654