1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 int i,j,rstart,ncols,n,ierr,flg; 354 int *row,*col,other_disassembled; 355 PetscScalar *val; 356 InsertMode addv = mat->insertmode; 357 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) 358 PetscTruth flag; 359 #endif 360 361 PetscFunctionBegin; 362 if (!aij->donotstash) { 363 while (1) { 364 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 365 if (!flg) break; 366 367 for (i=0; i<n;) { 368 /* Now identify the consecutive vals belonging to the same row */ 369 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 370 if (j < n) ncols = j-i; 371 else ncols = n-i; 372 /* Now assemble all these values with a single function call */ 373 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 374 i = j; 375 } 376 } 377 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 378 } 379 380 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 381 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 382 383 /* determine if any processor has disassembled, if so we must 384 also disassemble ourselfs, in order that we may reassemble. */ 385 /* 386 if nonzero structure of submatrix B cannot change then we know that 387 no processor disassembled thus we can skip this stuff 388 */ 389 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 390 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 391 if (mat->was_assembled && !other_disassembled) { 392 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 393 } 394 } 395 396 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 397 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 398 } 399 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 400 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 401 402 if (aij->rowvalues) { 403 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 404 aij->rowvalues = 0; 405 } 406 #if defined(PETSC_HAVE_SUPERLUDIST) 407 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 408 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 409 #endif 410 411 #if defined(PETSC_HAVE_SPOOLES) 412 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 413 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 414 #endif 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNCT__ 419 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 420 int MatZeroEntries_MPIAIJ(Mat A) 421 { 422 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 423 int ierr; 424 425 PetscFunctionBegin; 426 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 427 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 428 PetscFunctionReturn(0); 429 } 430 431 #undef __FUNCT__ 432 #define __FUNCT__ "MatZeroRows_MPIAIJ" 433 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 434 { 435 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 436 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 437 int *nprocs,j,idx,nsends,row; 438 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 439 int *rvalues,tag = A->tag,count,base,slen,n,*source; 440 int *lens,imdex,*lrows,*values,rstart=l->rstart; 441 MPI_Comm comm = A->comm; 442 MPI_Request *send_waits,*recv_waits; 443 MPI_Status recv_status,*send_status; 444 IS istmp; 445 PetscTruth found; 446 447 PetscFunctionBegin; 448 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 449 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 450 451 /* first count number of contributors to each processor */ 452 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 453 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 454 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 455 for (i=0; i<N; i++) { 456 idx = rows[i]; 457 found = PETSC_FALSE; 458 for (j=0; j<size; j++) { 459 if (idx >= owners[j] && idx < owners[j+1]) { 460 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 461 } 462 } 463 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 464 } 465 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 466 467 /* inform other processors of number of messages and max length*/ 468 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 469 470 /* post receives: */ 471 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 472 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 473 for (i=0; i<nrecvs; i++) { 474 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 475 } 476 477 /* do sends: 478 1) starts[i] gives the starting index in svalues for stuff going to 479 the ith processor 480 */ 481 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 482 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 483 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 484 starts[0] = 0; 485 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 486 for (i=0; i<N; i++) { 487 svalues[starts[owner[i]]++] = rows[i]; 488 } 489 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 490 491 starts[0] = 0; 492 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 493 count = 0; 494 for (i=0; i<size; i++) { 495 if (nprocs[2*i+1]) { 496 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 497 } 498 } 499 ierr = PetscFree(starts);CHKERRQ(ierr); 500 501 base = owners[rank]; 502 503 /* wait on receives */ 504 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 505 source = lens + nrecvs; 506 count = nrecvs; slen = 0; 507 while (count) { 508 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 509 /* unpack receives into our local space */ 510 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 511 source[imdex] = recv_status.MPI_SOURCE; 512 lens[imdex] = n; 513 slen += n; 514 count--; 515 } 516 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 517 518 /* move the data into the send scatter */ 519 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 520 count = 0; 521 for (i=0; i<nrecvs; i++) { 522 values = rvalues + i*nmax; 523 for (j=0; j<lens[i]; j++) { 524 lrows[count++] = values[j] - base; 525 } 526 } 527 ierr = PetscFree(rvalues);CHKERRQ(ierr); 528 ierr = PetscFree(lens);CHKERRQ(ierr); 529 ierr = PetscFree(owner);CHKERRQ(ierr); 530 ierr = PetscFree(nprocs);CHKERRQ(ierr); 531 532 /* actually zap the local rows */ 533 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 534 PetscLogObjectParent(A,istmp); 535 536 /* 537 Zero the required rows. If the "diagonal block" of the matrix 538 is square and the user wishes to set the diagonal we use seperate 539 code so that MatSetValues() is not called for each diagonal allocating 540 new memory, thus calling lots of mallocs and slowing things down. 541 542 Contributed by: Mathew Knepley 543 */ 544 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 545 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 546 if (diag && (l->A->M == l->A->N)) { 547 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 548 } else if (diag) { 549 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 550 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 551 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 552 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 553 } 554 for (i = 0; i < slen; i++) { 555 row = lrows[i] + rstart; 556 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 557 } 558 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 559 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 560 } else { 561 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 562 } 563 ierr = ISDestroy(istmp);CHKERRQ(ierr); 564 ierr = PetscFree(lrows);CHKERRQ(ierr); 565 566 /* wait on sends */ 567 if (nsends) { 568 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 569 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 570 ierr = PetscFree(send_status);CHKERRQ(ierr); 571 } 572 ierr = PetscFree(send_waits);CHKERRQ(ierr); 573 ierr = PetscFree(svalues);CHKERRQ(ierr); 574 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatMult_MPIAIJ" 580 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 581 { 582 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 583 int ierr,nt; 584 585 PetscFunctionBegin; 586 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 587 if (nt != A->n) { 588 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 589 } 590 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 591 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 592 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 593 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatMultAdd_MPIAIJ" 599 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 600 { 601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 602 int ierr; 603 604 PetscFunctionBegin; 605 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 606 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 607 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 608 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 614 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 615 { 616 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 617 int ierr; 618 619 PetscFunctionBegin; 620 /* do nondiagonal part */ 621 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 622 /* send it on its way */ 623 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 624 /* do local part */ 625 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 626 /* receive remote parts: note this assumes the values are not actually */ 627 /* inserted in yy until the next line, which is true for my implementation*/ 628 /* but is not perhaps always true. */ 629 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 #undef __FUNCT__ 634 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 635 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 636 { 637 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 638 int ierr; 639 640 PetscFunctionBegin; 641 /* do nondiagonal part */ 642 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 643 /* send it on its way */ 644 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 645 /* do local part */ 646 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 647 /* receive remote parts: note this assumes the values are not actually */ 648 /* inserted in yy until the next line, which is true for my implementation*/ 649 /* but is not perhaps always true. */ 650 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 651 PetscFunctionReturn(0); 652 } 653 654 /* 655 This only works correctly for square matrices where the subblock A->A is the 656 diagonal block 657 */ 658 #undef __FUNCT__ 659 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 660 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 661 { 662 int ierr; 663 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 664 665 PetscFunctionBegin; 666 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 667 if (a->rstart != a->cstart || a->rend != a->cend) { 668 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 669 } 670 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 671 PetscFunctionReturn(0); 672 } 673 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatScale_MPIAIJ" 676 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 677 { 678 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 679 int ierr; 680 681 PetscFunctionBegin; 682 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 683 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 684 PetscFunctionReturn(0); 685 } 686 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatDestroy_MPIAIJ" 689 int MatDestroy_MPIAIJ(Mat mat) 690 { 691 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 692 int ierr; 693 694 PetscFunctionBegin; 695 #if defined(PETSC_USE_LOG) 696 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 697 #endif 698 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 699 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 700 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 701 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 702 #if defined (PETSC_USE_CTABLE) 703 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 704 #else 705 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 706 #endif 707 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 708 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 709 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 710 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 711 ierr = PetscFree(aij);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 716 extern int MatFactorInfo_Spooles(Mat,PetscViewer); 717 718 #undef __FUNCT__ 719 #define __FUNCT__ "MatView_MPIAIJ_Binary" 720 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 721 { 722 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 723 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 724 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 725 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 726 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 727 PetscScalar *column_values; 728 729 PetscFunctionBegin; 730 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 731 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 732 nz = A->nz + B->nz; 733 if (rank == 0) { 734 header[0] = MAT_FILE_COOKIE; 735 header[1] = mat->M; 736 header[2] = mat->N; 737 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 738 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 739 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 740 /* get largest number of rows any processor has */ 741 rlen = mat->m; 742 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 743 for (i=1; i<size; i++) { 744 rlen = PetscMax(rlen,range[i+1] - range[i]); 745 } 746 } else { 747 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 748 rlen = mat->m; 749 } 750 751 /* load up the local row counts */ 752 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 753 for (i=0; i<mat->m; i++) { 754 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 755 } 756 757 /* store the row lengths to the file */ 758 if (rank == 0) { 759 MPI_Status status; 760 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 761 for (i=1; i<size; i++) { 762 rlen = range[i+1] - range[i]; 763 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 765 } 766 } else { 767 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 768 } 769 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 770 771 /* load up the local column indices */ 772 nzmax = nz; /* )th processor needs space a largest processor needs */ 773 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 774 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 775 cnt = 0; 776 for (i=0; i<mat->m; i++) { 777 for (j=B->i[i]; j<B->i[i+1]; j++) { 778 if ( (col = garray[B->j[j]]) > cstart) break; 779 column_indices[cnt++] = col; 780 } 781 for (k=A->i[i]; k<A->i[i+1]; k++) { 782 column_indices[cnt++] = A->j[k] + cstart; 783 } 784 for (; j<B->i[i+1]; j++) { 785 column_indices[cnt++] = garray[B->j[j]]; 786 } 787 } 788 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 789 790 /* store the column indices to the file */ 791 if (rank == 0) { 792 MPI_Status status; 793 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 794 for (i=1; i<size; i++) { 795 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 796 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 797 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 798 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 799 } 800 } else { 801 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 802 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 803 } 804 ierr = PetscFree(column_indices);CHKERRQ(ierr); 805 806 /* load up the local column values */ 807 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 808 cnt = 0; 809 for (i=0; i<mat->m; i++) { 810 for (j=B->i[i]; j<B->i[i+1]; j++) { 811 if ( garray[B->j[j]] > cstart) break; 812 column_values[cnt++] = B->a[j]; 813 } 814 for (k=A->i[i]; k<A->i[i+1]; k++) { 815 column_values[cnt++] = A->a[k]; 816 } 817 for (; j<B->i[i+1]; j++) { 818 column_values[cnt++] = B->a[j]; 819 } 820 } 821 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 822 823 /* store the column values to the file */ 824 if (rank == 0) { 825 MPI_Status status; 826 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 827 for (i=1; i<size; i++) { 828 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 829 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 830 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 831 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 832 } 833 } else { 834 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 835 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 836 } 837 ierr = PetscFree(column_values);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 #undef __FUNCT__ 842 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 843 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 844 { 845 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 846 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 847 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 848 PetscTruth isdraw,isascii,flg,isbinary; 849 PetscViewer sviewer; 850 PetscViewerFormat format; 851 852 PetscFunctionBegin; 853 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 854 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 855 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 856 if (isascii) { 857 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 858 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 859 MatInfo info; 860 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 861 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 862 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 863 if (flg) { 864 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 865 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 866 } else { 867 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 868 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 869 } 870 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 871 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 872 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 873 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 874 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 875 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } else if (format == PETSC_VIEWER_ASCII_INFO) { 878 PetscFunctionReturn(0); 879 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 880 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) 881 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 882 #endif 883 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) 884 ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 885 #endif 886 887 PetscFunctionReturn(0); 888 } 889 } else if (isbinary) { 890 if (size == 1) { 891 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 892 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 893 } else { 894 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 895 } 896 PetscFunctionReturn(0); 897 } else if (isdraw) { 898 PetscDraw draw; 899 PetscTruth isnull; 900 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 901 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 902 } 903 904 if (size == 1) { 905 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 906 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 907 } else { 908 /* assemble the entire matrix onto first processor. */ 909 Mat A; 910 Mat_SeqAIJ *Aloc; 911 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 912 PetscScalar *a; 913 914 if (!rank) { 915 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 916 } else { 917 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 918 } 919 PetscLogObjectParent(mat,A); 920 921 /* copy over the A part */ 922 Aloc = (Mat_SeqAIJ*)aij->A->data; 923 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 924 row = aij->rstart; 925 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 926 for (i=0; i<m; i++) { 927 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 928 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 929 } 930 aj = Aloc->j; 931 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 932 933 /* copy over the B part */ 934 Aloc = (Mat_SeqAIJ*)aij->B->data; 935 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 936 row = aij->rstart; 937 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 938 ct = cols; 939 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 940 for (i=0; i<m; i++) { 941 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 942 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 943 } 944 ierr = PetscFree(ct);CHKERRQ(ierr); 945 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 946 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 947 /* 948 Everyone has to call to draw the matrix since the graphics waits are 949 synchronized across all processors that share the PetscDraw object 950 */ 951 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 952 if (!rank) { 953 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 954 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 955 } 956 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 957 ierr = MatDestroy(A);CHKERRQ(ierr); 958 } 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatView_MPIAIJ" 964 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 965 { 966 int ierr; 967 PetscTruth isascii,isdraw,issocket,isbinary; 968 969 PetscFunctionBegin; 970 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 971 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 972 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 973 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 974 if (isascii || isdraw || isbinary || issocket) { 975 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 976 } else { 977 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 978 } 979 PetscFunctionReturn(0); 980 } 981 982 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatRelax_MPIAIJ" 986 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 987 { 988 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 989 int ierr; 990 Vec bb1; 991 PetscScalar mone=-1.0; 992 993 PetscFunctionBegin; 994 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 995 996 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 997 998 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 999 if (flag & SOR_ZERO_INITIAL_GUESS) { 1000 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1001 its--; 1002 } 1003 1004 while (its--) { 1005 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1007 1008 /* update rhs: bb1 = bb - B*x */ 1009 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1010 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1011 1012 /* local sweep */ 1013 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1014 CHKERRQ(ierr); 1015 } 1016 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1017 if (flag & SOR_ZERO_INITIAL_GUESS) { 1018 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1019 its--; 1020 } 1021 while (its--) { 1022 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1023 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1024 1025 /* update rhs: bb1 = bb - B*x */ 1026 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1027 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1028 1029 /* local sweep */ 1030 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1031 CHKERRQ(ierr); 1032 } 1033 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1034 if (flag & SOR_ZERO_INITIAL_GUESS) { 1035 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1036 its--; 1037 } 1038 while (its--) { 1039 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1040 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1041 1042 /* update rhs: bb1 = bb - B*x */ 1043 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1044 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1045 1046 /* local sweep */ 1047 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1048 CHKERRQ(ierr); 1049 } 1050 } else { 1051 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1052 } 1053 1054 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1055 PetscFunctionReturn(0); 1056 } 1057 1058 #undef __FUNCT__ 1059 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1060 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1061 { 1062 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1063 Mat A = mat->A,B = mat->B; 1064 int ierr; 1065 PetscReal isend[5],irecv[5]; 1066 1067 PetscFunctionBegin; 1068 info->block_size = 1.0; 1069 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1070 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1071 isend[3] = info->memory; isend[4] = info->mallocs; 1072 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1073 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1074 isend[3] += info->memory; isend[4] += info->mallocs; 1075 if (flag == MAT_LOCAL) { 1076 info->nz_used = isend[0]; 1077 info->nz_allocated = isend[1]; 1078 info->nz_unneeded = isend[2]; 1079 info->memory = isend[3]; 1080 info->mallocs = isend[4]; 1081 } else if (flag == MAT_GLOBAL_MAX) { 1082 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1083 info->nz_used = irecv[0]; 1084 info->nz_allocated = irecv[1]; 1085 info->nz_unneeded = irecv[2]; 1086 info->memory = irecv[3]; 1087 info->mallocs = irecv[4]; 1088 } else if (flag == MAT_GLOBAL_SUM) { 1089 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1090 info->nz_used = irecv[0]; 1091 info->nz_allocated = irecv[1]; 1092 info->nz_unneeded = irecv[2]; 1093 info->memory = irecv[3]; 1094 info->mallocs = irecv[4]; 1095 } 1096 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1097 info->fill_ratio_needed = 0; 1098 info->factor_mallocs = 0; 1099 info->rows_global = (double)matin->M; 1100 info->columns_global = (double)matin->N; 1101 info->rows_local = (double)matin->m; 1102 info->columns_local = (double)matin->N; 1103 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatSetOption_MPIAIJ" 1109 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1110 { 1111 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1112 int ierr; 1113 1114 PetscFunctionBegin; 1115 switch (op) { 1116 case MAT_NO_NEW_NONZERO_LOCATIONS: 1117 case MAT_YES_NEW_NONZERO_LOCATIONS: 1118 case MAT_COLUMNS_UNSORTED: 1119 case MAT_COLUMNS_SORTED: 1120 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1121 case MAT_KEEP_ZEROED_ROWS: 1122 case MAT_NEW_NONZERO_LOCATION_ERR: 1123 case MAT_USE_INODES: 1124 case MAT_DO_NOT_USE_INODES: 1125 case MAT_IGNORE_ZERO_ENTRIES: 1126 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1127 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1128 break; 1129 case MAT_ROW_ORIENTED: 1130 a->roworiented = PETSC_TRUE; 1131 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1132 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1133 break; 1134 case MAT_ROWS_SORTED: 1135 case MAT_ROWS_UNSORTED: 1136 case MAT_YES_NEW_DIAGONALS: 1137 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1138 break; 1139 case MAT_COLUMN_ORIENTED: 1140 a->roworiented = PETSC_FALSE; 1141 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1142 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1143 break; 1144 case MAT_IGNORE_OFF_PROC_ENTRIES: 1145 a->donotstash = PETSC_TRUE; 1146 break; 1147 case MAT_NO_NEW_DIAGONALS: 1148 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1149 default: 1150 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1151 } 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatGetRow_MPIAIJ" 1157 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1158 { 1159 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1160 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1161 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1162 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1163 int *cmap,*idx_p; 1164 1165 PetscFunctionBegin; 1166 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1167 mat->getrowactive = PETSC_TRUE; 1168 1169 if (!mat->rowvalues && (idx || v)) { 1170 /* 1171 allocate enough space to hold information from the longest row. 1172 */ 1173 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1174 int max = 1,tmp; 1175 for (i=0; i<matin->m; i++) { 1176 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1177 if (max < tmp) { max = tmp; } 1178 } 1179 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1180 mat->rowindices = (int*)(mat->rowvalues + max); 1181 } 1182 1183 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1184 lrow = row - rstart; 1185 1186 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1187 if (!v) {pvA = 0; pvB = 0;} 1188 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1189 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1190 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1191 nztot = nzA + nzB; 1192 1193 cmap = mat->garray; 1194 if (v || idx) { 1195 if (nztot) { 1196 /* Sort by increasing column numbers, assuming A and B already sorted */ 1197 int imark = -1; 1198 if (v) { 1199 *v = v_p = mat->rowvalues; 1200 for (i=0; i<nzB; i++) { 1201 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1202 else break; 1203 } 1204 imark = i; 1205 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1206 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1207 } 1208 if (idx) { 1209 *idx = idx_p = mat->rowindices; 1210 if (imark > -1) { 1211 for (i=0; i<imark; i++) { 1212 idx_p[i] = cmap[cworkB[i]]; 1213 } 1214 } else { 1215 for (i=0; i<nzB; i++) { 1216 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1217 else break; 1218 } 1219 imark = i; 1220 } 1221 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1222 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1223 } 1224 } else { 1225 if (idx) *idx = 0; 1226 if (v) *v = 0; 1227 } 1228 } 1229 *nz = nztot; 1230 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1231 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1237 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1238 { 1239 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1240 1241 PetscFunctionBegin; 1242 if (aij->getrowactive == PETSC_FALSE) { 1243 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1244 } 1245 aij->getrowactive = PETSC_FALSE; 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatNorm_MPIAIJ" 1251 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1252 { 1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1254 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1255 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1256 PetscReal sum = 0.0; 1257 PetscScalar *v; 1258 1259 PetscFunctionBegin; 1260 if (aij->size == 1) { 1261 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1262 } else { 1263 if (type == NORM_FROBENIUS) { 1264 v = amat->a; 1265 for (i=0; i<amat->nz; i++) { 1266 #if defined(PETSC_USE_COMPLEX) 1267 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1268 #else 1269 sum += (*v)*(*v); v++; 1270 #endif 1271 } 1272 v = bmat->a; 1273 for (i=0; i<bmat->nz; i++) { 1274 #if defined(PETSC_USE_COMPLEX) 1275 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1276 #else 1277 sum += (*v)*(*v); v++; 1278 #endif 1279 } 1280 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1281 *norm = sqrt(*norm); 1282 } else if (type == NORM_1) { /* max column norm */ 1283 PetscReal *tmp,*tmp2; 1284 int *jj,*garray = aij->garray; 1285 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1286 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1287 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1288 *norm = 0.0; 1289 v = amat->a; jj = amat->j; 1290 for (j=0; j<amat->nz; j++) { 1291 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1292 } 1293 v = bmat->a; jj = bmat->j; 1294 for (j=0; j<bmat->nz; j++) { 1295 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1296 } 1297 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1298 for (j=0; j<mat->N; j++) { 1299 if (tmp2[j] > *norm) *norm = tmp2[j]; 1300 } 1301 ierr = PetscFree(tmp);CHKERRQ(ierr); 1302 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1303 } else if (type == NORM_INFINITY) { /* max row norm */ 1304 PetscReal ntemp = 0.0; 1305 for (j=0; j<aij->A->m; j++) { 1306 v = amat->a + amat->i[j] + shift; 1307 sum = 0.0; 1308 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1309 sum += PetscAbsScalar(*v); v++; 1310 } 1311 v = bmat->a + bmat->i[j] + shift; 1312 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1313 sum += PetscAbsScalar(*v); v++; 1314 } 1315 if (sum > ntemp) ntemp = sum; 1316 } 1317 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1318 } else { 1319 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1320 } 1321 } 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatTranspose_MPIAIJ" 1327 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1328 { 1329 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1330 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1331 int ierr,shift = Aloc->indexshift; 1332 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1333 Mat B; 1334 PetscScalar *array; 1335 1336 PetscFunctionBegin; 1337 if (!matout && M != N) { 1338 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1339 } 1340 1341 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1342 1343 /* copy over the A part */ 1344 Aloc = (Mat_SeqAIJ*)a->A->data; 1345 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1346 row = a->rstart; 1347 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1348 for (i=0; i<m; i++) { 1349 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1350 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1351 } 1352 aj = Aloc->j; 1353 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1354 1355 /* copy over the B part */ 1356 Aloc = (Mat_SeqAIJ*)a->B->data; 1357 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1358 row = a->rstart; 1359 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1360 ct = cols; 1361 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1362 for (i=0; i<m; i++) { 1363 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1364 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1365 } 1366 ierr = PetscFree(ct);CHKERRQ(ierr); 1367 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1368 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1369 if (matout) { 1370 *matout = B; 1371 } else { 1372 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1373 } 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #undef __FUNCT__ 1378 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1379 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1380 { 1381 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1382 Mat a = aij->A,b = aij->B; 1383 int ierr,s1,s2,s3; 1384 1385 PetscFunctionBegin; 1386 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1387 if (rr) { 1388 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1389 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1390 /* Overlap communication with computation. */ 1391 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1392 } 1393 if (ll) { 1394 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1395 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1396 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1397 } 1398 /* scale the diagonal block */ 1399 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1400 1401 if (rr) { 1402 /* Do a scatter end and then right scale the off-diagonal block */ 1403 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1404 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1405 } 1406 1407 PetscFunctionReturn(0); 1408 } 1409 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1413 int MatPrintHelp_MPIAIJ(Mat A) 1414 { 1415 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1416 int ierr; 1417 1418 PetscFunctionBegin; 1419 if (!a->rank) { 1420 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1421 } 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1427 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1428 { 1429 PetscFunctionBegin; 1430 *bs = 1; 1431 PetscFunctionReturn(0); 1432 } 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1435 int MatSetUnfactored_MPIAIJ(Mat A) 1436 { 1437 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1438 int ierr; 1439 1440 PetscFunctionBegin; 1441 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1442 PetscFunctionReturn(0); 1443 } 1444 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatEqual_MPIAIJ" 1447 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1448 { 1449 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1450 Mat a,b,c,d; 1451 PetscTruth flg; 1452 int ierr; 1453 1454 PetscFunctionBegin; 1455 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1456 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1457 a = matA->A; b = matA->B; 1458 c = matB->A; d = matB->B; 1459 1460 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1461 if (flg == PETSC_TRUE) { 1462 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1463 } 1464 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1465 PetscFunctionReturn(0); 1466 } 1467 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "MatCopy_MPIAIJ" 1470 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1471 { 1472 int ierr; 1473 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1474 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1475 PetscTruth flg; 1476 1477 PetscFunctionBegin; 1478 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1479 if (str != SAME_NONZERO_PATTERN || !flg) { 1480 /* because of the column compression in the off-processor part of the matrix a->B, 1481 the number of columns in a->B and b->B may be different, hence we cannot call 1482 the MatCopy() directly on the two parts. If need be, we can provide a more 1483 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1484 then copying the submatrices */ 1485 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1486 } else { 1487 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1488 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1489 } 1490 PetscFunctionReturn(0); 1491 } 1492 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1495 int MatSetUpPreallocation_MPIAIJ(Mat A) 1496 { 1497 int ierr; 1498 1499 PetscFunctionBegin; 1500 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1501 PetscFunctionReturn(0); 1502 } 1503 1504 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1505 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1506 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1507 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1508 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1509 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1510 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*); 1511 #endif 1512 1513 #include "petscblaslapack.h" 1514 1515 #undef __FUNCT__ 1516 #define __FUNCT__ "MatAXPY_MPIAIJ" 1517 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1518 { 1519 int ierr,one; 1520 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1521 Mat_SeqAIJ *x,*y; 1522 1523 PetscFunctionBegin; 1524 if (str == SAME_NONZERO_PATTERN) { 1525 x = (Mat_SeqAIJ *)xx->A->data; 1526 y = (Mat_SeqAIJ *)yy->A->data; 1527 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1528 x = (Mat_SeqAIJ *)xx->B->data; 1529 y = (Mat_SeqAIJ *)yy->B->data; 1530 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1531 } else { 1532 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1533 } 1534 PetscFunctionReturn(0); 1535 } 1536 1537 /* -------------------------------------------------------------------*/ 1538 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1539 MatGetRow_MPIAIJ, 1540 MatRestoreRow_MPIAIJ, 1541 MatMult_MPIAIJ, 1542 MatMultAdd_MPIAIJ, 1543 MatMultTranspose_MPIAIJ, 1544 MatMultTransposeAdd_MPIAIJ, 1545 0, 1546 0, 1547 0, 1548 0, 1549 0, 1550 0, 1551 MatRelax_MPIAIJ, 1552 MatTranspose_MPIAIJ, 1553 MatGetInfo_MPIAIJ, 1554 MatEqual_MPIAIJ, 1555 MatGetDiagonal_MPIAIJ, 1556 MatDiagonalScale_MPIAIJ, 1557 MatNorm_MPIAIJ, 1558 MatAssemblyBegin_MPIAIJ, 1559 MatAssemblyEnd_MPIAIJ, 1560 0, 1561 MatSetOption_MPIAIJ, 1562 MatZeroEntries_MPIAIJ, 1563 MatZeroRows_MPIAIJ, 1564 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1565 MatLUFactorSymbolic_MPIAIJ_TFS, 1566 #else 1567 0, 1568 #endif 1569 0, 1570 0, 1571 0, 1572 MatSetUpPreallocation_MPIAIJ, 1573 0, 1574 0, 1575 0, 1576 0, 1577 MatDuplicate_MPIAIJ, 1578 0, 1579 0, 1580 0, 1581 0, 1582 MatAXPY_MPIAIJ, 1583 MatGetSubMatrices_MPIAIJ, 1584 MatIncreaseOverlap_MPIAIJ, 1585 MatGetValues_MPIAIJ, 1586 MatCopy_MPIAIJ, 1587 MatPrintHelp_MPIAIJ, 1588 MatScale_MPIAIJ, 1589 0, 1590 0, 1591 0, 1592 MatGetBlockSize_MPIAIJ, 1593 0, 1594 0, 1595 0, 1596 0, 1597 MatFDColoringCreate_MPIAIJ, 1598 0, 1599 MatSetUnfactored_MPIAIJ, 1600 0, 1601 0, 1602 MatGetSubMatrix_MPIAIJ, 1603 MatDestroy_MPIAIJ, 1604 MatView_MPIAIJ, 1605 MatGetPetscMaps_Petsc, 1606 0, 1607 0, 1608 0, 1609 0, 1610 0, 1611 0, 1612 0, 1613 0, 1614 MatSetColoring_MPIAIJ, 1615 MatSetValuesAdic_MPIAIJ, 1616 MatSetValuesAdifor_MPIAIJ 1617 }; 1618 1619 /* ----------------------------------------------------------------------------------------*/ 1620 1621 EXTERN_C_BEGIN 1622 #undef __FUNCT__ 1623 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1624 int MatStoreValues_MPIAIJ(Mat mat) 1625 { 1626 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1627 int ierr; 1628 1629 PetscFunctionBegin; 1630 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1631 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 EXTERN_C_END 1635 1636 EXTERN_C_BEGIN 1637 #undef __FUNCT__ 1638 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1639 int MatRetrieveValues_MPIAIJ(Mat mat) 1640 { 1641 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1642 int ierr; 1643 1644 PetscFunctionBegin; 1645 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1646 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1647 PetscFunctionReturn(0); 1648 } 1649 EXTERN_C_END 1650 1651 #include "petscpc.h" 1652 EXTERN_C_BEGIN 1653 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1654 EXTERN_C_END 1655 1656 EXTERN_C_BEGIN 1657 #undef __FUNCT__ 1658 #define __FUNCT__ "MatCreate_MPIAIJ" 1659 int MatCreate_MPIAIJ(Mat B) 1660 { 1661 Mat_MPIAIJ *b; 1662 int ierr,i,size; 1663 1664 PetscFunctionBegin; 1665 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1666 1667 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1668 B->data = (void*)b; 1669 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1670 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1671 B->factor = 0; 1672 B->assembled = PETSC_FALSE; 1673 B->mapping = 0; 1674 1675 B->insertmode = NOT_SET_VALUES; 1676 b->size = size; 1677 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1678 1679 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1680 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1681 1682 /* the information in the maps duplicates the information computed below, eventually 1683 we should remove the duplicate information that is not contained in the maps */ 1684 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1685 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1686 1687 /* build local table of row and column ownerships */ 1688 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1689 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1690 b->cowners = b->rowners + b->size + 2; 1691 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1692 b->rowners[0] = 0; 1693 for (i=2; i<=b->size; i++) { 1694 b->rowners[i] += b->rowners[i-1]; 1695 } 1696 b->rstart = b->rowners[b->rank]; 1697 b->rend = b->rowners[b->rank+1]; 1698 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1699 b->cowners[0] = 0; 1700 for (i=2; i<=b->size; i++) { 1701 b->cowners[i] += b->cowners[i-1]; 1702 } 1703 b->cstart = b->cowners[b->rank]; 1704 b->cend = b->cowners[b->rank+1]; 1705 1706 /* build cache for off array entries formed */ 1707 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1708 b->donotstash = PETSC_FALSE; 1709 b->colmap = 0; 1710 b->garray = 0; 1711 b->roworiented = PETSC_TRUE; 1712 1713 /* stuff used for matrix vector multiply */ 1714 b->lvec = PETSC_NULL; 1715 b->Mvctx = PETSC_NULL; 1716 1717 /* stuff for MatGetRow() */ 1718 b->rowindices = 0; 1719 b->rowvalues = 0; 1720 b->getrowactive = PETSC_FALSE; 1721 1722 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1723 "MatStoreValues_MPIAIJ", 1724 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1725 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1726 "MatRetrieveValues_MPIAIJ", 1727 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1728 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1729 "MatGetDiagonalBlock_MPIAIJ", 1730 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1731 1732 PetscFunctionReturn(0); 1733 } 1734 EXTERN_C_END 1735 1736 #undef __FUNCT__ 1737 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1738 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1739 { 1740 Mat mat; 1741 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1742 int ierr; 1743 1744 PetscFunctionBegin; 1745 *newmat = 0; 1746 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1747 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1748 a = (Mat_MPIAIJ*)mat->data; 1749 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1750 mat->factor = matin->factor; 1751 mat->assembled = PETSC_TRUE; 1752 mat->insertmode = NOT_SET_VALUES; 1753 mat->preallocated = PETSC_TRUE; 1754 1755 a->rstart = oldmat->rstart; 1756 a->rend = oldmat->rend; 1757 a->cstart = oldmat->cstart; 1758 a->cend = oldmat->cend; 1759 a->size = oldmat->size; 1760 a->rank = oldmat->rank; 1761 a->donotstash = oldmat->donotstash; 1762 a->roworiented = oldmat->roworiented; 1763 a->rowindices = 0; 1764 a->rowvalues = 0; 1765 a->getrowactive = PETSC_FALSE; 1766 1767 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1768 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1769 if (oldmat->colmap) { 1770 #if defined (PETSC_USE_CTABLE) 1771 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1772 #else 1773 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1774 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1775 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1776 #endif 1777 } else a->colmap = 0; 1778 if (oldmat->garray) { 1779 int len; 1780 len = oldmat->B->n; 1781 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1782 PetscLogObjectMemory(mat,len*sizeof(int)); 1783 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1784 } else a->garray = 0; 1785 1786 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1787 PetscLogObjectParent(mat,a->lvec); 1788 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1789 PetscLogObjectParent(mat,a->Mvctx); 1790 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1791 PetscLogObjectParent(mat,a->A); 1792 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1793 PetscLogObjectParent(mat,a->B); 1794 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1795 *newmat = mat; 1796 PetscFunctionReturn(0); 1797 } 1798 1799 #include "petscsys.h" 1800 1801 EXTERN_C_BEGIN 1802 #undef __FUNCT__ 1803 #define __FUNCT__ "MatLoad_MPIAIJ" 1804 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1805 { 1806 Mat A; 1807 PetscScalar *vals,*svals; 1808 MPI_Comm comm = ((PetscObject)viewer)->comm; 1809 MPI_Status status; 1810 int i,nz,ierr,j,rstart,rend,fd; 1811 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1812 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1813 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1814 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST) 1815 PetscTruth flag; 1816 #endif 1817 1818 PetscFunctionBegin; 1819 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1820 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1821 if (!rank) { 1822 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1823 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1824 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1825 if (header[3] < 0) { 1826 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1827 } 1828 } 1829 1830 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1831 M = header[1]; N = header[2]; 1832 /* determine ownership of all rows */ 1833 m = M/size + ((M % size) > rank); 1834 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1835 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1836 rowners[0] = 0; 1837 for (i=2; i<=size; i++) { 1838 rowners[i] += rowners[i-1]; 1839 } 1840 rstart = rowners[rank]; 1841 rend = rowners[rank+1]; 1842 1843 /* distribute row lengths to all processors */ 1844 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1845 offlens = ourlens + (rend-rstart); 1846 if (!rank) { 1847 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1848 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1849 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1850 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1851 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1852 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1853 } else { 1854 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1855 } 1856 1857 if (!rank) { 1858 /* calculate the number of nonzeros on each processor */ 1859 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1860 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1861 for (i=0; i<size; i++) { 1862 for (j=rowners[i]; j< rowners[i+1]; j++) { 1863 procsnz[i] += rowlengths[j]; 1864 } 1865 } 1866 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1867 1868 /* determine max buffer needed and allocate it */ 1869 maxnz = 0; 1870 for (i=0; i<size; i++) { 1871 maxnz = PetscMax(maxnz,procsnz[i]); 1872 } 1873 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1874 1875 /* read in my part of the matrix column indices */ 1876 nz = procsnz[0]; 1877 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1878 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1879 1880 /* read in every one elses and ship off */ 1881 for (i=1; i<size; i++) { 1882 nz = procsnz[i]; 1883 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1884 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1885 } 1886 ierr = PetscFree(cols);CHKERRQ(ierr); 1887 } else { 1888 /* determine buffer space needed for message */ 1889 nz = 0; 1890 for (i=0; i<m; i++) { 1891 nz += ourlens[i]; 1892 } 1893 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1894 1895 /* receive message of column indices*/ 1896 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1897 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1898 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1899 } 1900 1901 /* determine column ownership if matrix is not square */ 1902 if (N != M) { 1903 n = N/size + ((N % size) > rank); 1904 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1905 cstart = cend - n; 1906 } else { 1907 cstart = rstart; 1908 cend = rend; 1909 n = cend - cstart; 1910 } 1911 1912 /* loop over local rows, determining number of off diagonal entries */ 1913 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1914 jj = 0; 1915 for (i=0; i<m; i++) { 1916 for (j=0; j<ourlens[i]; j++) { 1917 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1918 jj++; 1919 } 1920 } 1921 1922 /* create our matrix */ 1923 for (i=0; i<m; i++) { 1924 ourlens[i] -= offlens[i]; 1925 } 1926 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1927 A = *newmat; 1928 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1929 for (i=0; i<m; i++) { 1930 ourlens[i] += offlens[i]; 1931 } 1932 1933 if (!rank) { 1934 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1935 1936 /* read in my part of the matrix numerical values */ 1937 nz = procsnz[0]; 1938 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1939 1940 /* insert into matrix */ 1941 jj = rstart; 1942 smycols = mycols; 1943 svals = vals; 1944 for (i=0; i<m; i++) { 1945 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1946 smycols += ourlens[i]; 1947 svals += ourlens[i]; 1948 jj++; 1949 } 1950 1951 /* read in other processors and ship out */ 1952 for (i=1; i<size; i++) { 1953 nz = procsnz[i]; 1954 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1955 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1956 } 1957 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1958 } else { 1959 /* receive numeric values */ 1960 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1961 1962 /* receive message of values*/ 1963 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1964 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1965 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1966 1967 /* insert into matrix */ 1968 jj = rstart; 1969 smycols = mycols; 1970 svals = vals; 1971 for (i=0; i<m; i++) { 1972 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1973 smycols += ourlens[i]; 1974 svals += ourlens[i]; 1975 jj++; 1976 } 1977 } 1978 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1979 ierr = PetscFree(vals);CHKERRQ(ierr); 1980 ierr = PetscFree(mycols);CHKERRQ(ierr); 1981 ierr = PetscFree(rowners);CHKERRQ(ierr); 1982 1983 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1984 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1985 #if defined(PETSC_HAVE_SPOOLES) 1986 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 1987 if (flag) { 1988 if (size == 1) { 1989 ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr); 1990 } else { 1991 ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr); 1992 } 1993 } 1994 #endif 1995 #if defined(PETSC_HAVE_SUPERLUDIST) 1996 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 1997 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); } 1998 #endif 1999 PetscFunctionReturn(0); 2000 } 2001 EXTERN_C_END 2002 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2005 /* 2006 Not great since it makes two copies of the submatrix, first an SeqAIJ 2007 in local and then by concatenating the local matrices the end result. 2008 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2009 */ 2010 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2011 { 2012 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2013 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2014 Mat *local,M,Mreuse; 2015 PetscScalar *vwork,*aa; 2016 MPI_Comm comm = mat->comm; 2017 Mat_SeqAIJ *aij; 2018 2019 2020 PetscFunctionBegin; 2021 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2022 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2023 2024 if (call == MAT_REUSE_MATRIX) { 2025 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2026 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2027 local = &Mreuse; 2028 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2029 } else { 2030 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2031 Mreuse = *local; 2032 ierr = PetscFree(local);CHKERRQ(ierr); 2033 } 2034 2035 /* 2036 m - number of local rows 2037 n - number of columns (same on all processors) 2038 rstart - first row in new global matrix generated 2039 */ 2040 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2041 if (call == MAT_INITIAL_MATRIX) { 2042 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2043 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 2044 ii = aij->i; 2045 jj = aij->j; 2046 2047 /* 2048 Determine the number of non-zeros in the diagonal and off-diagonal 2049 portions of the matrix in order to do correct preallocation 2050 */ 2051 2052 /* first get start and end of "diagonal" columns */ 2053 if (csize == PETSC_DECIDE) { 2054 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2055 if (mglobal == n) { /* square matrix */ 2056 nlocal = m; 2057 } else { 2058 nlocal = n/size + ((n % size) > rank); 2059 } 2060 } else { 2061 nlocal = csize; 2062 } 2063 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2064 rstart = rend - nlocal; 2065 if (rank == size - 1 && rend != n) { 2066 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 2067 } 2068 2069 /* next, compute all the lengths */ 2070 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2071 olens = dlens + m; 2072 for (i=0; i<m; i++) { 2073 jend = ii[i+1] - ii[i]; 2074 olen = 0; 2075 dlen = 0; 2076 for (j=0; j<jend; j++) { 2077 if (*jj < rstart || *jj >= rend) olen++; 2078 else dlen++; 2079 jj++; 2080 } 2081 olens[i] = olen; 2082 dlens[i] = dlen; 2083 } 2084 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2085 ierr = PetscFree(dlens);CHKERRQ(ierr); 2086 } else { 2087 int ml,nl; 2088 2089 M = *newmat; 2090 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2091 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2092 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2093 /* 2094 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2095 rather than the slower MatSetValues(). 2096 */ 2097 M->was_assembled = PETSC_TRUE; 2098 M->assembled = PETSC_FALSE; 2099 } 2100 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2101 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2102 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 2103 ii = aij->i; 2104 jj = aij->j; 2105 aa = aij->a; 2106 for (i=0; i<m; i++) { 2107 row = rstart + i; 2108 nz = ii[i+1] - ii[i]; 2109 cwork = jj; jj += nz; 2110 vwork = aa; aa += nz; 2111 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2112 } 2113 2114 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2115 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2116 *newmat = M; 2117 2118 /* save submatrix used in processor for next request */ 2119 if (call == MAT_INITIAL_MATRIX) { 2120 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2121 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2122 } 2123 2124 PetscFunctionReturn(0); 2125 } 2126 2127 #undef __FUNCT__ 2128 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2129 /*@C 2130 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2131 (the default parallel PETSc format). For good matrix assembly performance 2132 the user should preallocate the matrix storage by setting the parameters 2133 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2134 performance can be increased by more than a factor of 50. 2135 2136 Collective on MPI_Comm 2137 2138 Input Parameters: 2139 + A - the matrix 2140 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2141 (same value is used for all local rows) 2142 . d_nnz - array containing the number of nonzeros in the various rows of the 2143 DIAGONAL portion of the local submatrix (possibly different for each row) 2144 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2145 The size of this array is equal to the number of local rows, i.e 'm'. 2146 You must leave room for the diagonal entry even if it is zero. 2147 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2148 submatrix (same value is used for all local rows). 2149 - o_nnz - array containing the number of nonzeros in the various rows of the 2150 OFF-DIAGONAL portion of the local submatrix (possibly different for 2151 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2152 structure. The size of this array is equal to the number 2153 of local rows, i.e 'm'. 2154 2155 The AIJ format (also called the Yale sparse matrix format or 2156 compressed row storage), is fully compatible with standard Fortran 77 2157 storage. That is, the stored row and column indices can begin at 2158 either one (as in Fortran) or zero. See the users manual for details. 2159 2160 The user MUST specify either the local or global matrix dimensions 2161 (possibly both). 2162 2163 The parallel matrix is partitioned such that the first m0 rows belong to 2164 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2165 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2166 2167 The DIAGONAL portion of the local submatrix of a processor can be defined 2168 as the submatrix which is obtained by extraction the part corresponding 2169 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2170 first row that belongs to the processor, and r2 is the last row belonging 2171 to the this processor. This is a square mxm matrix. The remaining portion 2172 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2173 2174 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2175 2176 By default, this format uses inodes (identical nodes) when possible. 2177 We search for consecutive rows with the same nonzero structure, thereby 2178 reusing matrix information to achieve increased efficiency. 2179 2180 Options Database Keys: 2181 + -mat_aij_no_inode - Do not use inodes 2182 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2183 - -mat_aij_oneindex - Internally use indexing starting at 1 2184 rather than 0. Note that when calling MatSetValues(), 2185 the user still MUST index entries starting at 0! 2186 2187 Example usage: 2188 2189 Consider the following 8x8 matrix with 34 non-zero values, that is 2190 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2191 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2192 as follows: 2193 2194 .vb 2195 1 2 0 | 0 3 0 | 0 4 2196 Proc0 0 5 6 | 7 0 0 | 8 0 2197 9 0 10 | 11 0 0 | 12 0 2198 ------------------------------------- 2199 13 0 14 | 15 16 17 | 0 0 2200 Proc1 0 18 0 | 19 20 21 | 0 0 2201 0 0 0 | 22 23 0 | 24 0 2202 ------------------------------------- 2203 Proc2 25 26 27 | 0 0 28 | 29 0 2204 30 0 0 | 31 32 33 | 0 34 2205 .ve 2206 2207 This can be represented as a collection of submatrices as: 2208 2209 .vb 2210 A B C 2211 D E F 2212 G H I 2213 .ve 2214 2215 Where the submatrices A,B,C are owned by proc0, D,E,F are 2216 owned by proc1, G,H,I are owned by proc2. 2217 2218 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2219 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2220 The 'M','N' parameters are 8,8, and have the same values on all procs. 2221 2222 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2223 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2224 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2225 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2226 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2227 matrix, ans [DF] as another SeqAIJ matrix. 2228 2229 When d_nz, o_nz parameters are specified, d_nz storage elements are 2230 allocated for every row of the local diagonal submatrix, and o_nz 2231 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2232 One way to choose d_nz and o_nz is to use the max nonzerors per local 2233 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2234 In this case, the values of d_nz,o_nz are: 2235 .vb 2236 proc0 : dnz = 2, o_nz = 2 2237 proc1 : dnz = 3, o_nz = 2 2238 proc2 : dnz = 1, o_nz = 4 2239 .ve 2240 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2241 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2242 for proc3. i.e we are using 12+15+10=37 storage locations to store 2243 34 values. 2244 2245 When d_nnz, o_nnz parameters are specified, the storage is specified 2246 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2247 In the above case the values for d_nnz,o_nnz are: 2248 .vb 2249 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2250 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2251 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2252 .ve 2253 Here the space allocated is sum of all the above values i.e 34, and 2254 hence pre-allocation is perfect. 2255 2256 Level: intermediate 2257 2258 .keywords: matrix, aij, compressed row, sparse, parallel 2259 2260 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2261 @*/ 2262 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2263 { 2264 Mat_MPIAIJ *b; 2265 int ierr,i; 2266 PetscTruth flg2; 2267 2268 PetscFunctionBegin; 2269 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2270 if (!flg2) PetscFunctionReturn(0); 2271 B->preallocated = PETSC_TRUE; 2272 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2273 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2274 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2275 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2276 if (d_nnz) { 2277 for (i=0; i<B->m; i++) { 2278 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2279 } 2280 } 2281 if (o_nnz) { 2282 for (i=0; i<B->m; i++) { 2283 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2284 } 2285 } 2286 b = (Mat_MPIAIJ*)B->data; 2287 2288 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2289 PetscLogObjectParent(B,b->A); 2290 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2291 PetscLogObjectParent(B,b->B); 2292 2293 PetscFunctionReturn(0); 2294 } 2295 2296 #undef __FUNCT__ 2297 #define __FUNCT__ "MatCreateMPIAIJ" 2298 /*@C 2299 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2300 (the default parallel PETSc format). For good matrix assembly performance 2301 the user should preallocate the matrix storage by setting the parameters 2302 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2303 performance can be increased by more than a factor of 50. 2304 2305 Collective on MPI_Comm 2306 2307 Input Parameters: 2308 + comm - MPI communicator 2309 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2310 This value should be the same as the local size used in creating the 2311 y vector for the matrix-vector product y = Ax. 2312 . n - This value should be the same as the local size used in creating the 2313 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2314 calculated if N is given) For square matrices n is almost always m. 2315 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2316 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2317 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2318 (same value is used for all local rows) 2319 . d_nnz - array containing the number of nonzeros in the various rows of the 2320 DIAGONAL portion of the local submatrix (possibly different for each row) 2321 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2322 The size of this array is equal to the number of local rows, i.e 'm'. 2323 You must leave room for the diagonal entry even if it is zero. 2324 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2325 submatrix (same value is used for all local rows). 2326 - o_nnz - array containing the number of nonzeros in the various rows of the 2327 OFF-DIAGONAL portion of the local submatrix (possibly different for 2328 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2329 structure. The size of this array is equal to the number 2330 of local rows, i.e 'm'. 2331 2332 Output Parameter: 2333 . A - the matrix 2334 2335 Notes: 2336 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2337 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2338 storage requirements for this matrix. 2339 2340 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2341 processor than it must be used on all processors that share the object for 2342 that argument. 2343 2344 The AIJ format (also called the Yale sparse matrix format or 2345 compressed row storage), is fully compatible with standard Fortran 77 2346 storage. That is, the stored row and column indices can begin at 2347 either one (as in Fortran) or zero. See the users manual for details. 2348 2349 The user MUST specify either the local or global matrix dimensions 2350 (possibly both). 2351 2352 The parallel matrix is partitioned such that the first m0 rows belong to 2353 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2354 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2355 2356 The DIAGONAL portion of the local submatrix of a processor can be defined 2357 as the submatrix which is obtained by extraction the part corresponding 2358 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2359 first row that belongs to the processor, and r2 is the last row belonging 2360 to the this processor. This is a square mxm matrix. The remaining portion 2361 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2362 2363 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2364 2365 By default, this format uses inodes (identical nodes) when possible. 2366 We search for consecutive rows with the same nonzero structure, thereby 2367 reusing matrix information to achieve increased efficiency. 2368 2369 Options Database Keys: 2370 + -mat_aij_no_inode - Do not use inodes 2371 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2372 - -mat_aij_oneindex - Internally use indexing starting at 1 2373 rather than 0. Note that when calling MatSetValues(), 2374 the user still MUST index entries starting at 0! 2375 2376 2377 Example usage: 2378 2379 Consider the following 8x8 matrix with 34 non-zero values, that is 2380 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2381 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2382 as follows: 2383 2384 .vb 2385 1 2 0 | 0 3 0 | 0 4 2386 Proc0 0 5 6 | 7 0 0 | 8 0 2387 9 0 10 | 11 0 0 | 12 0 2388 ------------------------------------- 2389 13 0 14 | 15 16 17 | 0 0 2390 Proc1 0 18 0 | 19 20 21 | 0 0 2391 0 0 0 | 22 23 0 | 24 0 2392 ------------------------------------- 2393 Proc2 25 26 27 | 0 0 28 | 29 0 2394 30 0 0 | 31 32 33 | 0 34 2395 .ve 2396 2397 This can be represented as a collection of submatrices as: 2398 2399 .vb 2400 A B C 2401 D E F 2402 G H I 2403 .ve 2404 2405 Where the submatrices A,B,C are owned by proc0, D,E,F are 2406 owned by proc1, G,H,I are owned by proc2. 2407 2408 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2409 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2410 The 'M','N' parameters are 8,8, and have the same values on all procs. 2411 2412 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2413 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2414 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2415 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2416 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2417 matrix, ans [DF] as another SeqAIJ matrix. 2418 2419 When d_nz, o_nz parameters are specified, d_nz storage elements are 2420 allocated for every row of the local diagonal submatrix, and o_nz 2421 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2422 One way to choose d_nz and o_nz is to use the max nonzerors per local 2423 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2424 In this case, the values of d_nz,o_nz are: 2425 .vb 2426 proc0 : dnz = 2, o_nz = 2 2427 proc1 : dnz = 3, o_nz = 2 2428 proc2 : dnz = 1, o_nz = 4 2429 .ve 2430 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2431 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2432 for proc3. i.e we are using 12+15+10=37 storage locations to store 2433 34 values. 2434 2435 When d_nnz, o_nnz parameters are specified, the storage is specified 2436 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2437 In the above case the values for d_nnz,o_nnz are: 2438 .vb 2439 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2440 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2441 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2442 .ve 2443 Here the space allocated is sum of all the above values i.e 34, and 2444 hence pre-allocation is perfect. 2445 2446 Level: intermediate 2447 2448 .keywords: matrix, aij, compressed row, sparse, parallel 2449 2450 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2451 @*/ 2452 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2453 { 2454 int ierr,size; 2455 2456 PetscFunctionBegin; 2457 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2458 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2459 if (size > 1) { 2460 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2461 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2462 } else { 2463 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2464 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2465 } 2466 PetscFunctionReturn(0); 2467 } 2468 2469 #undef __FUNCT__ 2470 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2471 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2472 { 2473 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2474 PetscFunctionBegin; 2475 *Ad = a->A; 2476 *Ao = a->B; 2477 *colmap = a->garray; 2478 PetscFunctionReturn(0); 2479 } 2480 2481 #undef __FUNCT__ 2482 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2483 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2484 { 2485 int ierr,i; 2486 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2487 2488 PetscFunctionBegin; 2489 if (coloring->ctype == IS_COLORING_LOCAL) { 2490 ISColoringValue *allcolors,*colors; 2491 ISColoring ocoloring; 2492 2493 /* set coloring for diagonal portion */ 2494 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2495 2496 /* set coloring for off-diagonal portion */ 2497 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2498 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2499 for (i=0; i<a->B->n; i++) { 2500 colors[i] = allcolors[a->garray[i]]; 2501 } 2502 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2503 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2504 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2505 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2506 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2507 ISColoringValue *colors; 2508 int *larray; 2509 ISColoring ocoloring; 2510 2511 /* set coloring for diagonal portion */ 2512 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2513 for (i=0; i<a->A->n; i++) { 2514 larray[i] = i + a->cstart; 2515 } 2516 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2517 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2518 for (i=0; i<a->A->n; i++) { 2519 colors[i] = coloring->colors[larray[i]]; 2520 } 2521 ierr = PetscFree(larray);CHKERRQ(ierr); 2522 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2523 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2524 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2525 2526 /* set coloring for off-diagonal portion */ 2527 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2528 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2529 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2530 for (i=0; i<a->B->n; i++) { 2531 colors[i] = coloring->colors[larray[i]]; 2532 } 2533 ierr = PetscFree(larray);CHKERRQ(ierr); 2534 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2535 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2536 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2537 } else { 2538 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2539 } 2540 2541 PetscFunctionReturn(0); 2542 } 2543 2544 #undef __FUNCT__ 2545 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2546 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2547 { 2548 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2549 int ierr; 2550 2551 PetscFunctionBegin; 2552 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2553 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2559 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2560 { 2561 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2562 int ierr; 2563 2564 PetscFunctionBegin; 2565 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2566 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2567 PetscFunctionReturn(0); 2568 } 2569 2570 #undef __FUNCT__ 2571 #define __FUNCT__ "MatMerge" 2572 /*@C 2573 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2574 matrices from each processor 2575 2576 Collective on MPI_Comm 2577 2578 Input Parameters: 2579 + comm - the communicators the parallel matrix will live on 2580 - inmat - the input sequential matrices 2581 2582 Output Parameter: 2583 . outmat - the parallel matrix generated 2584 2585 Level: advanced 2586 2587 Notes: The number of columns of the matrix in EACH of the seperate files 2588 MUST be the same. 2589 2590 @*/ 2591 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2592 { 2593 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2594 PetscScalar *values; 2595 PetscMap columnmap,rowmap; 2596 2597 PetscFunctionBegin; 2598 2599 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2600 2601 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2602 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2603 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2604 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2605 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2606 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2607 2608 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2609 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2610 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2611 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2612 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2613 2614 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2615 for (i=0;i<m;i++) { 2616 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2617 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2618 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2619 } 2620 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2621 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2622 2623 for (i=0;i<m;i++) { 2624 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2625 I = i + rstart; 2626 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2627 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2628 } 2629 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2630 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2631 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2632 2633 PetscFunctionReturn(0); 2634 } 2635 2636 #undef __FUNCT__ 2637 #define __FUNCT__ "MatFileSplit" 2638 int MatFileSplit(Mat A,char *outfile) 2639 { 2640 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2641 PetscViewer out; 2642 char *name; 2643 Mat B; 2644 PetscScalar *values; 2645 2646 PetscFunctionBegin; 2647 2648 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2649 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2650 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2651 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2652 for (i=0;i<m;i++) { 2653 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2654 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2655 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2656 } 2657 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2658 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2659 2660 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2661 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2662 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2663 sprintf(name,"%s.%d",outfile,rank); 2664 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2665 ierr = PetscFree(name); 2666 ierr = MatView(B,out);CHKERRQ(ierr); 2667 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2668 ierr = MatDestroy(B);CHKERRQ(ierr); 2669 PetscFunctionReturn(0); 2670 } 2671