1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 int i,j,rstart,ncols,n,ierr,flg; 354 int *row,*col,other_disassembled; 355 PetscScalar *val; 356 InsertMode addv = mat->insertmode; 357 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) 358 PetscTruth flag; 359 #endif 360 361 PetscFunctionBegin; 362 if (!aij->donotstash) { 363 while (1) { 364 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 365 if (!flg) break; 366 367 for (i=0; i<n;) { 368 /* Now identify the consecutive vals belonging to the same row */ 369 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 370 if (j < n) ncols = j-i; 371 else ncols = n-i; 372 /* Now assemble all these values with a single function call */ 373 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 374 i = j; 375 } 376 } 377 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 378 } 379 380 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 381 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 382 383 /* determine if any processor has disassembled, if so we must 384 also disassemble ourselfs, in order that we may reassemble. */ 385 /* 386 if nonzero structure of submatrix B cannot change then we know that 387 no processor disassembled thus we can skip this stuff 388 */ 389 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 390 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 391 if (mat->was_assembled && !other_disassembled) { 392 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 393 } 394 } 395 396 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 397 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 398 } 399 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 400 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 401 402 if (aij->rowvalues) { 403 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 404 aij->rowvalues = 0; 405 } 406 #if defined(PETSC_HAVE_SUPERLUDIST) 407 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 408 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 409 #endif 410 411 #if defined(PETSC_HAVE_SPOOLES) 412 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 413 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 414 #endif 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNCT__ 419 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 420 int MatZeroEntries_MPIAIJ(Mat A) 421 { 422 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 423 int ierr; 424 425 PetscFunctionBegin; 426 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 427 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 428 PetscFunctionReturn(0); 429 } 430 431 #undef __FUNCT__ 432 #define __FUNCT__ "MatZeroRows_MPIAIJ" 433 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 434 { 435 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 436 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 437 int *nprocs,j,idx,nsends,row; 438 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 439 int *rvalues,tag = A->tag,count,base,slen,n,*source; 440 int *lens,imdex,*lrows,*values,rstart=l->rstart; 441 MPI_Comm comm = A->comm; 442 MPI_Request *send_waits,*recv_waits; 443 MPI_Status recv_status,*send_status; 444 IS istmp; 445 PetscTruth found; 446 447 PetscFunctionBegin; 448 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 449 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 450 451 /* first count number of contributors to each processor */ 452 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 453 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 454 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 455 for (i=0; i<N; i++) { 456 idx = rows[i]; 457 found = PETSC_FALSE; 458 for (j=0; j<size; j++) { 459 if (idx >= owners[j] && idx < owners[j+1]) { 460 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 461 } 462 } 463 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 464 } 465 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 466 467 /* inform other processors of number of messages and max length*/ 468 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 469 470 /* post receives: */ 471 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 472 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 473 for (i=0; i<nrecvs; i++) { 474 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 475 } 476 477 /* do sends: 478 1) starts[i] gives the starting index in svalues for stuff going to 479 the ith processor 480 */ 481 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 482 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 483 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 484 starts[0] = 0; 485 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 486 for (i=0; i<N; i++) { 487 svalues[starts[owner[i]]++] = rows[i]; 488 } 489 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 490 491 starts[0] = 0; 492 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 493 count = 0; 494 for (i=0; i<size; i++) { 495 if (nprocs[2*i+1]) { 496 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 497 } 498 } 499 ierr = PetscFree(starts);CHKERRQ(ierr); 500 501 base = owners[rank]; 502 503 /* wait on receives */ 504 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 505 source = lens + nrecvs; 506 count = nrecvs; slen = 0; 507 while (count) { 508 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 509 /* unpack receives into our local space */ 510 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 511 source[imdex] = recv_status.MPI_SOURCE; 512 lens[imdex] = n; 513 slen += n; 514 count--; 515 } 516 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 517 518 /* move the data into the send scatter */ 519 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 520 count = 0; 521 for (i=0; i<nrecvs; i++) { 522 values = rvalues + i*nmax; 523 for (j=0; j<lens[i]; j++) { 524 lrows[count++] = values[j] - base; 525 } 526 } 527 ierr = PetscFree(rvalues);CHKERRQ(ierr); 528 ierr = PetscFree(lens);CHKERRQ(ierr); 529 ierr = PetscFree(owner);CHKERRQ(ierr); 530 ierr = PetscFree(nprocs);CHKERRQ(ierr); 531 532 /* actually zap the local rows */ 533 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 534 PetscLogObjectParent(A,istmp); 535 536 /* 537 Zero the required rows. If the "diagonal block" of the matrix 538 is square and the user wishes to set the diagonal we use seperate 539 code so that MatSetValues() is not called for each diagonal allocating 540 new memory, thus calling lots of mallocs and slowing things down. 541 542 Contributed by: Mathew Knepley 543 */ 544 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 545 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 546 if (diag && (l->A->M == l->A->N)) { 547 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 548 } else if (diag) { 549 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 550 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 551 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 552 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 553 } 554 for (i = 0; i < slen; i++) { 555 row = lrows[i] + rstart; 556 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 557 } 558 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 559 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 560 } else { 561 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 562 } 563 ierr = ISDestroy(istmp);CHKERRQ(ierr); 564 ierr = PetscFree(lrows);CHKERRQ(ierr); 565 566 /* wait on sends */ 567 if (nsends) { 568 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 569 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 570 ierr = PetscFree(send_status);CHKERRQ(ierr); 571 } 572 ierr = PetscFree(send_waits);CHKERRQ(ierr); 573 ierr = PetscFree(svalues);CHKERRQ(ierr); 574 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatMult_MPIAIJ" 580 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 581 { 582 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 583 int ierr,nt; 584 585 PetscFunctionBegin; 586 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 587 if (nt != A->n) { 588 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 589 } 590 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 591 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 592 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 593 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatMultAdd_MPIAIJ" 599 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 600 { 601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 602 int ierr; 603 604 PetscFunctionBegin; 605 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 606 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 607 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 608 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 614 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 615 { 616 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 617 int ierr; 618 619 PetscFunctionBegin; 620 /* do nondiagonal part */ 621 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 622 /* send it on its way */ 623 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 624 /* do local part */ 625 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 626 /* receive remote parts: note this assumes the values are not actually */ 627 /* inserted in yy until the next line, which is true for my implementation*/ 628 /* but is not perhaps always true. */ 629 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 #undef __FUNCT__ 634 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 635 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 636 { 637 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 638 int ierr; 639 640 PetscFunctionBegin; 641 /* do nondiagonal part */ 642 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 643 /* send it on its way */ 644 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 645 /* do local part */ 646 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 647 /* receive remote parts: note this assumes the values are not actually */ 648 /* inserted in yy until the next line, which is true for my implementation*/ 649 /* but is not perhaps always true. */ 650 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 651 PetscFunctionReturn(0); 652 } 653 654 /* 655 This only works correctly for square matrices where the subblock A->A is the 656 diagonal block 657 */ 658 #undef __FUNCT__ 659 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 660 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 661 { 662 int ierr; 663 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 664 665 PetscFunctionBegin; 666 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 667 if (a->rstart != a->cstart || a->rend != a->cend) { 668 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 669 } 670 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 671 PetscFunctionReturn(0); 672 } 673 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatScale_MPIAIJ" 676 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 677 { 678 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 679 int ierr; 680 681 PetscFunctionBegin; 682 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 683 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 684 PetscFunctionReturn(0); 685 } 686 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatDestroy_MPIAIJ" 689 int MatDestroy_MPIAIJ(Mat mat) 690 { 691 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 692 int ierr; 693 694 PetscFunctionBegin; 695 #if defined(PETSC_USE_LOG) 696 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 697 #endif 698 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 699 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 700 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 701 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 702 #if defined (PETSC_USE_CTABLE) 703 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 704 #else 705 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 706 #endif 707 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 708 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 709 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 710 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 711 ierr = PetscFree(aij);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 716 extern int MatFactorInfo_Spooles(Mat,PetscViewer); 717 718 #undef __FUNCT__ 719 #define __FUNCT__ "MatView_MPIAIJ_Binary" 720 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 721 { 722 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 723 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 724 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 725 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 726 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 727 PetscScalar *column_values; 728 729 PetscFunctionBegin; 730 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 731 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 732 nz = A->nz + B->nz; 733 if (rank == 0) { 734 header[0] = MAT_FILE_COOKIE; 735 header[1] = mat->M; 736 header[2] = mat->N; 737 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 738 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 739 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 740 /* get largest number of rows any processor has */ 741 rlen = mat->m; 742 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 743 for (i=1; i<size; i++) { 744 rlen = PetscMax(rlen,range[i+1] - range[i]); 745 } 746 } else { 747 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 748 rlen = mat->m; 749 } 750 751 /* load up the local row counts */ 752 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 753 for (i=0; i<mat->m; i++) { 754 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 755 } 756 757 /* store the row lengths to the file */ 758 if (rank == 0) { 759 MPI_Status status; 760 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 761 for (i=1; i<size; i++) { 762 rlen = range[i+1] - range[i]; 763 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 765 } 766 } else { 767 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 768 } 769 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 770 771 /* load up the local column indices */ 772 nzmax = nz; /* )th processor needs space a largest processor needs */ 773 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 774 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 775 cnt = 0; 776 for (i=0; i<mat->m; i++) { 777 for (j=B->i[i]; j<B->i[i+1]; j++) { 778 if ( (col = garray[B->j[j]]) > cstart) break; 779 column_indices[cnt++] = col; 780 } 781 for (k=A->i[i]; k<A->i[i+1]; k++) { 782 column_indices[cnt++] = A->j[k] + cstart; 783 } 784 for (; j<B->i[i+1]; j++) { 785 column_indices[cnt++] = garray[B->j[j]]; 786 } 787 } 788 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 789 790 /* store the column indices to the file */ 791 if (rank == 0) { 792 MPI_Status status; 793 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 794 for (i=1; i<size; i++) { 795 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 796 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 797 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 798 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 799 } 800 } else { 801 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 802 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 803 } 804 ierr = PetscFree(column_indices);CHKERRQ(ierr); 805 806 /* load up the local column values */ 807 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 808 cnt = 0; 809 for (i=0; i<mat->m; i++) { 810 for (j=B->i[i]; j<B->i[i+1]; j++) { 811 if ( garray[B->j[j]] > cstart) break; 812 column_values[cnt++] = B->a[j]; 813 } 814 for (k=A->i[i]; k<A->i[i+1]; k++) { 815 column_values[cnt++] = A->a[k]; 816 } 817 for (; j<B->i[i+1]; j++) { 818 column_values[cnt++] = B->a[j]; 819 } 820 } 821 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 822 823 /* store the column values to the file */ 824 if (rank == 0) { 825 MPI_Status status; 826 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 827 for (i=1; i<size; i++) { 828 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 829 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 830 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 831 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 832 } 833 } else { 834 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 835 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 836 } 837 ierr = PetscFree(column_values);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 #undef __FUNCT__ 842 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 843 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 844 { 845 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 846 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 847 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 848 PetscTruth isdraw,isascii,flg,isbinary; 849 PetscViewer sviewer; 850 PetscViewerFormat format; 851 852 PetscFunctionBegin; 853 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 854 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 855 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 856 if (isascii) { 857 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 858 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 859 MatInfo info; 860 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 861 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 862 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 863 if (flg) { 864 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 865 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 866 } else { 867 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 868 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 869 } 870 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 871 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 872 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 873 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 874 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 875 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } else if (format == PETSC_VIEWER_ASCII_INFO) { 878 PetscFunctionReturn(0); 879 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 880 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) 881 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 882 #endif 883 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) 884 ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 885 #endif 886 887 PetscFunctionReturn(0); 888 } 889 } else if (isbinary) { 890 if (size == 1) { 891 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 892 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 893 } else { 894 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 895 } 896 PetscFunctionReturn(0); 897 } else if (isdraw) { 898 PetscDraw draw; 899 PetscTruth isnull; 900 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 901 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 902 } 903 904 if (size == 1) { 905 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 906 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 907 } else { 908 /* assemble the entire matrix onto first processor. */ 909 Mat A; 910 Mat_SeqAIJ *Aloc; 911 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 912 PetscScalar *a; 913 914 if (!rank) { 915 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 916 } else { 917 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 918 } 919 PetscLogObjectParent(mat,A); 920 921 /* copy over the A part */ 922 Aloc = (Mat_SeqAIJ*)aij->A->data; 923 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 924 row = aij->rstart; 925 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 926 for (i=0; i<m; i++) { 927 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 928 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 929 } 930 aj = Aloc->j; 931 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 932 933 /* copy over the B part */ 934 Aloc = (Mat_SeqAIJ*)aij->B->data; 935 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 936 row = aij->rstart; 937 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 938 ct = cols; 939 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 940 for (i=0; i<m; i++) { 941 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 942 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 943 } 944 ierr = PetscFree(ct);CHKERRQ(ierr); 945 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 946 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 947 /* 948 Everyone has to call to draw the matrix since the graphics waits are 949 synchronized across all processors that share the PetscDraw object 950 */ 951 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 952 if (!rank) { 953 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 954 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 955 } 956 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 957 ierr = MatDestroy(A);CHKERRQ(ierr); 958 } 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatView_MPIAIJ" 964 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 965 { 966 int ierr; 967 PetscTruth isascii,isdraw,issocket,isbinary; 968 969 PetscFunctionBegin; 970 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 971 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 972 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 973 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 974 if (isascii || isdraw || isbinary || issocket) { 975 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 976 } else { 977 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 978 } 979 PetscFunctionReturn(0); 980 } 981 982 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatRelax_MPIAIJ" 986 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 987 { 988 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 989 int ierr; 990 Vec bb1; 991 PetscScalar mone=-1.0; 992 993 PetscFunctionBegin; 994 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 995 996 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 997 998 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 999 if (flag & SOR_ZERO_INITIAL_GUESS) { 1000 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1001 its--; 1002 } 1003 1004 while (its--) { 1005 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1007 1008 /* update rhs: bb1 = bb - B*x */ 1009 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1010 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1011 1012 /* local sweep */ 1013 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1014 CHKERRQ(ierr); 1015 } 1016 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1017 if (flag & SOR_ZERO_INITIAL_GUESS) { 1018 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1019 its--; 1020 } 1021 while (its--) { 1022 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1023 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1024 1025 /* update rhs: bb1 = bb - B*x */ 1026 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1027 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1028 1029 /* local sweep */ 1030 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1031 CHKERRQ(ierr); 1032 } 1033 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1034 if (flag & SOR_ZERO_INITIAL_GUESS) { 1035 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1036 its--; 1037 } 1038 while (its--) { 1039 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1040 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1041 1042 /* update rhs: bb1 = bb - B*x */ 1043 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1044 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1045 1046 /* local sweep */ 1047 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1048 CHKERRQ(ierr); 1049 } 1050 } else { 1051 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1052 } 1053 1054 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1055 PetscFunctionReturn(0); 1056 } 1057 1058 #undef __FUNCT__ 1059 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1060 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1061 { 1062 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1063 Mat A = mat->A,B = mat->B; 1064 int ierr; 1065 PetscReal isend[5],irecv[5]; 1066 1067 PetscFunctionBegin; 1068 info->block_size = 1.0; 1069 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1070 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1071 isend[3] = info->memory; isend[4] = info->mallocs; 1072 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1073 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1074 isend[3] += info->memory; isend[4] += info->mallocs; 1075 if (flag == MAT_LOCAL) { 1076 info->nz_used = isend[0]; 1077 info->nz_allocated = isend[1]; 1078 info->nz_unneeded = isend[2]; 1079 info->memory = isend[3]; 1080 info->mallocs = isend[4]; 1081 } else if (flag == MAT_GLOBAL_MAX) { 1082 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1083 info->nz_used = irecv[0]; 1084 info->nz_allocated = irecv[1]; 1085 info->nz_unneeded = irecv[2]; 1086 info->memory = irecv[3]; 1087 info->mallocs = irecv[4]; 1088 } else if (flag == MAT_GLOBAL_SUM) { 1089 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1090 info->nz_used = irecv[0]; 1091 info->nz_allocated = irecv[1]; 1092 info->nz_unneeded = irecv[2]; 1093 info->memory = irecv[3]; 1094 info->mallocs = irecv[4]; 1095 } 1096 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1097 info->fill_ratio_needed = 0; 1098 info->factor_mallocs = 0; 1099 info->rows_global = (double)matin->M; 1100 info->columns_global = (double)matin->N; 1101 info->rows_local = (double)matin->m; 1102 info->columns_local = (double)matin->N; 1103 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatSetOption_MPIAIJ" 1109 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1110 { 1111 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1112 int ierr; 1113 1114 PetscFunctionBegin; 1115 switch (op) { 1116 case MAT_NO_NEW_NONZERO_LOCATIONS: 1117 case MAT_YES_NEW_NONZERO_LOCATIONS: 1118 case MAT_COLUMNS_UNSORTED: 1119 case MAT_COLUMNS_SORTED: 1120 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1121 case MAT_KEEP_ZEROED_ROWS: 1122 case MAT_NEW_NONZERO_LOCATION_ERR: 1123 case MAT_USE_INODES: 1124 case MAT_DO_NOT_USE_INODES: 1125 case MAT_IGNORE_ZERO_ENTRIES: 1126 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1127 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1128 break; 1129 case MAT_ROW_ORIENTED: 1130 a->roworiented = PETSC_TRUE; 1131 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1132 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1133 break; 1134 case MAT_ROWS_SORTED: 1135 case MAT_ROWS_UNSORTED: 1136 case MAT_YES_NEW_DIAGONALS: 1137 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1138 break; 1139 case MAT_COLUMN_ORIENTED: 1140 a->roworiented = PETSC_FALSE; 1141 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1142 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1143 break; 1144 case MAT_IGNORE_OFF_PROC_ENTRIES: 1145 a->donotstash = PETSC_TRUE; 1146 break; 1147 case MAT_NO_NEW_DIAGONALS: 1148 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1149 default: 1150 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1151 } 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatGetRow_MPIAIJ" 1157 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1158 { 1159 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1160 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1161 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1162 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1163 int *cmap,*idx_p; 1164 1165 PetscFunctionBegin; 1166 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1167 mat->getrowactive = PETSC_TRUE; 1168 1169 if (!mat->rowvalues && (idx || v)) { 1170 /* 1171 allocate enough space to hold information from the longest row. 1172 */ 1173 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1174 int max = 1,tmp; 1175 for (i=0; i<matin->m; i++) { 1176 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1177 if (max < tmp) { max = tmp; } 1178 } 1179 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1180 mat->rowindices = (int*)(mat->rowvalues + max); 1181 } 1182 1183 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1184 lrow = row - rstart; 1185 1186 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1187 if (!v) {pvA = 0; pvB = 0;} 1188 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1189 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1190 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1191 nztot = nzA + nzB; 1192 1193 cmap = mat->garray; 1194 if (v || idx) { 1195 if (nztot) { 1196 /* Sort by increasing column numbers, assuming A and B already sorted */ 1197 int imark = -1; 1198 if (v) { 1199 *v = v_p = mat->rowvalues; 1200 for (i=0; i<nzB; i++) { 1201 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1202 else break; 1203 } 1204 imark = i; 1205 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1206 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1207 } 1208 if (idx) { 1209 *idx = idx_p = mat->rowindices; 1210 if (imark > -1) { 1211 for (i=0; i<imark; i++) { 1212 idx_p[i] = cmap[cworkB[i]]; 1213 } 1214 } else { 1215 for (i=0; i<nzB; i++) { 1216 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1217 else break; 1218 } 1219 imark = i; 1220 } 1221 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1222 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1223 } 1224 } else { 1225 if (idx) *idx = 0; 1226 if (v) *v = 0; 1227 } 1228 } 1229 *nz = nztot; 1230 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1231 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1237 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1238 { 1239 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1240 1241 PetscFunctionBegin; 1242 if (aij->getrowactive == PETSC_FALSE) { 1243 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1244 } 1245 aij->getrowactive = PETSC_FALSE; 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatNorm_MPIAIJ" 1251 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1252 { 1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1254 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1255 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1256 PetscReal sum = 0.0; 1257 PetscScalar *v; 1258 1259 PetscFunctionBegin; 1260 if (aij->size == 1) { 1261 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1262 } else { 1263 if (type == NORM_FROBENIUS) { 1264 v = amat->a; 1265 for (i=0; i<amat->nz; i++) { 1266 #if defined(PETSC_USE_COMPLEX) 1267 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1268 #else 1269 sum += (*v)*(*v); v++; 1270 #endif 1271 } 1272 v = bmat->a; 1273 for (i=0; i<bmat->nz; i++) { 1274 #if defined(PETSC_USE_COMPLEX) 1275 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1276 #else 1277 sum += (*v)*(*v); v++; 1278 #endif 1279 } 1280 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1281 *norm = sqrt(*norm); 1282 } else if (type == NORM_1) { /* max column norm */ 1283 PetscReal *tmp,*tmp2; 1284 int *jj,*garray = aij->garray; 1285 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1286 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1287 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1288 *norm = 0.0; 1289 v = amat->a; jj = amat->j; 1290 for (j=0; j<amat->nz; j++) { 1291 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1292 } 1293 v = bmat->a; jj = bmat->j; 1294 for (j=0; j<bmat->nz; j++) { 1295 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1296 } 1297 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1298 for (j=0; j<mat->N; j++) { 1299 if (tmp2[j] > *norm) *norm = tmp2[j]; 1300 } 1301 ierr = PetscFree(tmp);CHKERRQ(ierr); 1302 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1303 } else if (type == NORM_INFINITY) { /* max row norm */ 1304 PetscReal ntemp = 0.0; 1305 for (j=0; j<aij->A->m; j++) { 1306 v = amat->a + amat->i[j] + shift; 1307 sum = 0.0; 1308 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1309 sum += PetscAbsScalar(*v); v++; 1310 } 1311 v = bmat->a + bmat->i[j] + shift; 1312 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1313 sum += PetscAbsScalar(*v); v++; 1314 } 1315 if (sum > ntemp) ntemp = sum; 1316 } 1317 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1318 } else { 1319 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1320 } 1321 } 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatTranspose_MPIAIJ" 1327 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1328 { 1329 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1330 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1331 int ierr,shift = Aloc->indexshift; 1332 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1333 Mat B; 1334 PetscScalar *array; 1335 1336 PetscFunctionBegin; 1337 if (!matout && M != N) { 1338 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1339 } 1340 1341 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1342 1343 /* copy over the A part */ 1344 Aloc = (Mat_SeqAIJ*)a->A->data; 1345 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1346 row = a->rstart; 1347 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1348 for (i=0; i<m; i++) { 1349 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1350 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1351 } 1352 aj = Aloc->j; 1353 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1354 1355 /* copy over the B part */ 1356 Aloc = (Mat_SeqAIJ*)a->B->data; 1357 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1358 row = a->rstart; 1359 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1360 ct = cols; 1361 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1362 for (i=0; i<m; i++) { 1363 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1364 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1365 } 1366 ierr = PetscFree(ct);CHKERRQ(ierr); 1367 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1368 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1369 if (matout) { 1370 *matout = B; 1371 } else { 1372 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1373 } 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #undef __FUNCT__ 1378 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1379 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1380 { 1381 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1382 Mat a = aij->A,b = aij->B; 1383 int ierr,s1,s2,s3; 1384 1385 PetscFunctionBegin; 1386 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1387 if (rr) { 1388 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1389 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1390 /* Overlap communication with computation. */ 1391 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1392 } 1393 if (ll) { 1394 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1395 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1396 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1397 } 1398 /* scale the diagonal block */ 1399 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1400 1401 if (rr) { 1402 /* Do a scatter end and then right scale the off-diagonal block */ 1403 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1404 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1405 } 1406 1407 PetscFunctionReturn(0); 1408 } 1409 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1413 int MatPrintHelp_MPIAIJ(Mat A) 1414 { 1415 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1416 int ierr; 1417 1418 PetscFunctionBegin; 1419 if (!a->rank) { 1420 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1421 } 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1427 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1428 { 1429 PetscFunctionBegin; 1430 *bs = 1; 1431 PetscFunctionReturn(0); 1432 } 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1435 int MatSetUnfactored_MPIAIJ(Mat A) 1436 { 1437 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1438 int ierr; 1439 1440 PetscFunctionBegin; 1441 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1442 PetscFunctionReturn(0); 1443 } 1444 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatEqual_MPIAIJ" 1447 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1448 { 1449 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1450 Mat a,b,c,d; 1451 PetscTruth flg; 1452 int ierr; 1453 1454 PetscFunctionBegin; 1455 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1456 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1457 a = matA->A; b = matA->B; 1458 c = matB->A; d = matB->B; 1459 1460 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1461 if (flg == PETSC_TRUE) { 1462 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1463 } 1464 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1465 PetscFunctionReturn(0); 1466 } 1467 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "MatCopy_MPIAIJ" 1470 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1471 { 1472 int ierr; 1473 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1474 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1475 PetscTruth flg; 1476 1477 PetscFunctionBegin; 1478 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1479 if (str != SAME_NONZERO_PATTERN || !flg) { 1480 /* because of the column compression in the off-processor part of the matrix a->B, 1481 the number of columns in a->B and b->B may be different, hence we cannot call 1482 the MatCopy() directly on the two parts. If need be, we can provide a more 1483 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1484 then copying the submatrices */ 1485 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1486 } else { 1487 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1488 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1489 } 1490 PetscFunctionReturn(0); 1491 } 1492 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1495 int MatSetUpPreallocation_MPIAIJ(Mat A) 1496 { 1497 int ierr; 1498 1499 PetscFunctionBegin; 1500 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1501 PetscFunctionReturn(0); 1502 } 1503 1504 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1505 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1506 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1507 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1508 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1509 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1510 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*); 1511 #endif 1512 1513 #include "petscblaslapack.h" 1514 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure); 1515 #undef __FUNCT__ 1516 #define __FUNCT__ "MatAXPY_MPIAIJ" 1517 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1518 { 1519 int ierr,one=1; 1520 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1521 Mat_SeqAIJ *x,*y; 1522 1523 PetscFunctionBegin; 1524 if (str == SAME_NONZERO_PATTERN) { 1525 x = (Mat_SeqAIJ *)xx->A->data; 1526 y = (Mat_SeqAIJ *)yy->A->data; 1527 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1528 x = (Mat_SeqAIJ *)xx->B->data; 1529 y = (Mat_SeqAIJ *)yy->B->data; 1530 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1531 } else if (str == SUBSET_NONZERO_PATTERN) { /* B indeces have problem: map to original index! */ 1532 ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr); 1533 /* ierr = MatAXPY_SeqAIJ(a,xx->B,yy->B,str);CHKERRQ(ierr); */ 1534 1535 int rank,nx,ny; 1536 ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 1537 if (rank == 0) { 1538 x = (Mat_SeqAIJ *)xx->B->data; 1539 int j,row=0,*garray=xx->garray; 1540 MatGetSize(xx->B,PETSC_NULL,&nx); 1541 MatGetSize(yy->B,PETSC_NULL,&ny); 1542 printf("nx: %d, ny: %d\n",nx,ny); 1543 for (j=0; j<nx; j++){ 1544 printf("%d, ",garray[j]); 1545 } 1546 printf("\n----------- \n"); 1547 for (j=x->i[row]; j<x->i[row+1]; j++){ 1548 printf("%d, ",x->j[j]); 1549 } 1550 printf("\n----------- \n"); 1551 /* printf("[%d], xx->A: \n", rank); 1552 ierr = MatView(xx->A,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); */ 1553 printf("[%d], xx->B: \n", rank); 1554 ierr = MatView(xx->B,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 1555 printf(" -----------\n"); 1556 } 1557 1558 ierr = MatView(X,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 1559 } else { 1560 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1561 } 1562 PetscFunctionReturn(0); 1563 } 1564 1565 /* -------------------------------------------------------------------*/ 1566 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1567 MatGetRow_MPIAIJ, 1568 MatRestoreRow_MPIAIJ, 1569 MatMult_MPIAIJ, 1570 MatMultAdd_MPIAIJ, 1571 MatMultTranspose_MPIAIJ, 1572 MatMultTransposeAdd_MPIAIJ, 1573 0, 1574 0, 1575 0, 1576 0, 1577 0, 1578 0, 1579 MatRelax_MPIAIJ, 1580 MatTranspose_MPIAIJ, 1581 MatGetInfo_MPIAIJ, 1582 MatEqual_MPIAIJ, 1583 MatGetDiagonal_MPIAIJ, 1584 MatDiagonalScale_MPIAIJ, 1585 MatNorm_MPIAIJ, 1586 MatAssemblyBegin_MPIAIJ, 1587 MatAssemblyEnd_MPIAIJ, 1588 0, 1589 MatSetOption_MPIAIJ, 1590 MatZeroEntries_MPIAIJ, 1591 MatZeroRows_MPIAIJ, 1592 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1593 MatLUFactorSymbolic_MPIAIJ_TFS, 1594 #else 1595 0, 1596 #endif 1597 0, 1598 0, 1599 0, 1600 MatSetUpPreallocation_MPIAIJ, 1601 0, 1602 0, 1603 0, 1604 0, 1605 MatDuplicate_MPIAIJ, 1606 0, 1607 0, 1608 0, 1609 0, 1610 MatAXPY_MPIAIJ, 1611 MatGetSubMatrices_MPIAIJ, 1612 MatIncreaseOverlap_MPIAIJ, 1613 MatGetValues_MPIAIJ, 1614 MatCopy_MPIAIJ, 1615 MatPrintHelp_MPIAIJ, 1616 MatScale_MPIAIJ, 1617 0, 1618 0, 1619 0, 1620 MatGetBlockSize_MPIAIJ, 1621 0, 1622 0, 1623 0, 1624 0, 1625 MatFDColoringCreate_MPIAIJ, 1626 0, 1627 MatSetUnfactored_MPIAIJ, 1628 0, 1629 0, 1630 MatGetSubMatrix_MPIAIJ, 1631 MatDestroy_MPIAIJ, 1632 MatView_MPIAIJ, 1633 MatGetPetscMaps_Petsc, 1634 0, 1635 0, 1636 0, 1637 0, 1638 0, 1639 0, 1640 0, 1641 0, 1642 MatSetColoring_MPIAIJ, 1643 MatSetValuesAdic_MPIAIJ, 1644 MatSetValuesAdifor_MPIAIJ 1645 }; 1646 1647 /* ----------------------------------------------------------------------------------------*/ 1648 1649 EXTERN_C_BEGIN 1650 #undef __FUNCT__ 1651 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1652 int MatStoreValues_MPIAIJ(Mat mat) 1653 { 1654 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1655 int ierr; 1656 1657 PetscFunctionBegin; 1658 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1659 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1660 PetscFunctionReturn(0); 1661 } 1662 EXTERN_C_END 1663 1664 EXTERN_C_BEGIN 1665 #undef __FUNCT__ 1666 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1667 int MatRetrieveValues_MPIAIJ(Mat mat) 1668 { 1669 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1670 int ierr; 1671 1672 PetscFunctionBegin; 1673 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1674 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1675 PetscFunctionReturn(0); 1676 } 1677 EXTERN_C_END 1678 1679 #include "petscpc.h" 1680 EXTERN_C_BEGIN 1681 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1682 EXTERN_C_END 1683 1684 EXTERN_C_BEGIN 1685 #undef __FUNCT__ 1686 #define __FUNCT__ "MatCreate_MPIAIJ" 1687 int MatCreate_MPIAIJ(Mat B) 1688 { 1689 Mat_MPIAIJ *b; 1690 int ierr,i,size; 1691 1692 PetscFunctionBegin; 1693 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1694 1695 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1696 B->data = (void*)b; 1697 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1698 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1699 B->factor = 0; 1700 B->assembled = PETSC_FALSE; 1701 B->mapping = 0; 1702 1703 B->insertmode = NOT_SET_VALUES; 1704 b->size = size; 1705 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1706 1707 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1708 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1709 1710 /* the information in the maps duplicates the information computed below, eventually 1711 we should remove the duplicate information that is not contained in the maps */ 1712 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1713 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1714 1715 /* build local table of row and column ownerships */ 1716 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1717 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1718 b->cowners = b->rowners + b->size + 2; 1719 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1720 b->rowners[0] = 0; 1721 for (i=2; i<=b->size; i++) { 1722 b->rowners[i] += b->rowners[i-1]; 1723 } 1724 b->rstart = b->rowners[b->rank]; 1725 b->rend = b->rowners[b->rank+1]; 1726 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1727 b->cowners[0] = 0; 1728 for (i=2; i<=b->size; i++) { 1729 b->cowners[i] += b->cowners[i-1]; 1730 } 1731 b->cstart = b->cowners[b->rank]; 1732 b->cend = b->cowners[b->rank+1]; 1733 1734 /* build cache for off array entries formed */ 1735 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1736 b->donotstash = PETSC_FALSE; 1737 b->colmap = 0; 1738 b->garray = 0; 1739 b->roworiented = PETSC_TRUE; 1740 1741 /* stuff used for matrix vector multiply */ 1742 b->lvec = PETSC_NULL; 1743 b->Mvctx = PETSC_NULL; 1744 1745 /* stuff for MatGetRow() */ 1746 b->rowindices = 0; 1747 b->rowvalues = 0; 1748 b->getrowactive = PETSC_FALSE; 1749 1750 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1751 "MatStoreValues_MPIAIJ", 1752 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1753 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1754 "MatRetrieveValues_MPIAIJ", 1755 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1756 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1757 "MatGetDiagonalBlock_MPIAIJ", 1758 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1759 1760 PetscFunctionReturn(0); 1761 } 1762 EXTERN_C_END 1763 1764 #undef __FUNCT__ 1765 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1766 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1767 { 1768 Mat mat; 1769 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1770 int ierr; 1771 1772 PetscFunctionBegin; 1773 *newmat = 0; 1774 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1775 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1776 a = (Mat_MPIAIJ*)mat->data; 1777 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1778 mat->factor = matin->factor; 1779 mat->assembled = PETSC_TRUE; 1780 mat->insertmode = NOT_SET_VALUES; 1781 mat->preallocated = PETSC_TRUE; 1782 1783 a->rstart = oldmat->rstart; 1784 a->rend = oldmat->rend; 1785 a->cstart = oldmat->cstart; 1786 a->cend = oldmat->cend; 1787 a->size = oldmat->size; 1788 a->rank = oldmat->rank; 1789 a->donotstash = oldmat->donotstash; 1790 a->roworiented = oldmat->roworiented; 1791 a->rowindices = 0; 1792 a->rowvalues = 0; 1793 a->getrowactive = PETSC_FALSE; 1794 1795 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1796 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1797 if (oldmat->colmap) { 1798 #if defined (PETSC_USE_CTABLE) 1799 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1800 #else 1801 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1802 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1803 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1804 #endif 1805 } else a->colmap = 0; 1806 if (oldmat->garray) { 1807 int len; 1808 len = oldmat->B->n; 1809 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1810 PetscLogObjectMemory(mat,len*sizeof(int)); 1811 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1812 } else a->garray = 0; 1813 1814 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1815 PetscLogObjectParent(mat,a->lvec); 1816 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1817 PetscLogObjectParent(mat,a->Mvctx); 1818 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1819 PetscLogObjectParent(mat,a->A); 1820 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1821 PetscLogObjectParent(mat,a->B); 1822 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1823 *newmat = mat; 1824 PetscFunctionReturn(0); 1825 } 1826 1827 #include "petscsys.h" 1828 1829 EXTERN_C_BEGIN 1830 #undef __FUNCT__ 1831 #define __FUNCT__ "MatLoad_MPIAIJ" 1832 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1833 { 1834 Mat A; 1835 PetscScalar *vals,*svals; 1836 MPI_Comm comm = ((PetscObject)viewer)->comm; 1837 MPI_Status status; 1838 int i,nz,ierr,j,rstart,rend,fd; 1839 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1840 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1841 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1842 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST) 1843 PetscTruth flag; 1844 #endif 1845 1846 PetscFunctionBegin; 1847 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1848 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1849 if (!rank) { 1850 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1851 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1852 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1853 if (header[3] < 0) { 1854 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1855 } 1856 } 1857 1858 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1859 M = header[1]; N = header[2]; 1860 /* determine ownership of all rows */ 1861 m = M/size + ((M % size) > rank); 1862 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1863 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1864 rowners[0] = 0; 1865 for (i=2; i<=size; i++) { 1866 rowners[i] += rowners[i-1]; 1867 } 1868 rstart = rowners[rank]; 1869 rend = rowners[rank+1]; 1870 1871 /* distribute row lengths to all processors */ 1872 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1873 offlens = ourlens + (rend-rstart); 1874 if (!rank) { 1875 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1876 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1877 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1878 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1879 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1880 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1881 } else { 1882 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1883 } 1884 1885 if (!rank) { 1886 /* calculate the number of nonzeros on each processor */ 1887 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1888 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1889 for (i=0; i<size; i++) { 1890 for (j=rowners[i]; j< rowners[i+1]; j++) { 1891 procsnz[i] += rowlengths[j]; 1892 } 1893 } 1894 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1895 1896 /* determine max buffer needed and allocate it */ 1897 maxnz = 0; 1898 for (i=0; i<size; i++) { 1899 maxnz = PetscMax(maxnz,procsnz[i]); 1900 } 1901 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1902 1903 /* read in my part of the matrix column indices */ 1904 nz = procsnz[0]; 1905 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1906 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1907 1908 /* read in every one elses and ship off */ 1909 for (i=1; i<size; i++) { 1910 nz = procsnz[i]; 1911 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1912 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1913 } 1914 ierr = PetscFree(cols);CHKERRQ(ierr); 1915 } else { 1916 /* determine buffer space needed for message */ 1917 nz = 0; 1918 for (i=0; i<m; i++) { 1919 nz += ourlens[i]; 1920 } 1921 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1922 1923 /* receive message of column indices*/ 1924 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1925 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1926 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1927 } 1928 1929 /* determine column ownership if matrix is not square */ 1930 if (N != M) { 1931 n = N/size + ((N % size) > rank); 1932 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1933 cstart = cend - n; 1934 } else { 1935 cstart = rstart; 1936 cend = rend; 1937 n = cend - cstart; 1938 } 1939 1940 /* loop over local rows, determining number of off diagonal entries */ 1941 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1942 jj = 0; 1943 for (i=0; i<m; i++) { 1944 for (j=0; j<ourlens[i]; j++) { 1945 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1946 jj++; 1947 } 1948 } 1949 1950 /* create our matrix */ 1951 for (i=0; i<m; i++) { 1952 ourlens[i] -= offlens[i]; 1953 } 1954 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1955 A = *newmat; 1956 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1957 for (i=0; i<m; i++) { 1958 ourlens[i] += offlens[i]; 1959 } 1960 1961 if (!rank) { 1962 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1963 1964 /* read in my part of the matrix numerical values */ 1965 nz = procsnz[0]; 1966 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1967 1968 /* insert into matrix */ 1969 jj = rstart; 1970 smycols = mycols; 1971 svals = vals; 1972 for (i=0; i<m; i++) { 1973 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1974 smycols += ourlens[i]; 1975 svals += ourlens[i]; 1976 jj++; 1977 } 1978 1979 /* read in other processors and ship out */ 1980 for (i=1; i<size; i++) { 1981 nz = procsnz[i]; 1982 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1983 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1984 } 1985 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1986 } else { 1987 /* receive numeric values */ 1988 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1989 1990 /* receive message of values*/ 1991 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1992 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1993 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1994 1995 /* insert into matrix */ 1996 jj = rstart; 1997 smycols = mycols; 1998 svals = vals; 1999 for (i=0; i<m; i++) { 2000 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2001 smycols += ourlens[i]; 2002 svals += ourlens[i]; 2003 jj++; 2004 } 2005 } 2006 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2007 ierr = PetscFree(vals);CHKERRQ(ierr); 2008 ierr = PetscFree(mycols);CHKERRQ(ierr); 2009 ierr = PetscFree(rowners);CHKERRQ(ierr); 2010 2011 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2012 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2013 #if defined(PETSC_HAVE_SPOOLES) 2014 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 2015 if (flag) { 2016 if (size == 1) { 2017 ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr); 2018 } else { 2019 ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr); 2020 } 2021 } 2022 #endif 2023 #if defined(PETSC_HAVE_SUPERLUDIST) 2024 ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 2025 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); } 2026 #endif 2027 PetscFunctionReturn(0); 2028 } 2029 EXTERN_C_END 2030 2031 #undef __FUNCT__ 2032 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 2033 /* 2034 Not great since it makes two copies of the submatrix, first an SeqAIJ 2035 in local and then by concatenating the local matrices the end result. 2036 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2037 */ 2038 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2039 { 2040 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2041 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 2042 Mat *local,M,Mreuse; 2043 PetscScalar *vwork,*aa; 2044 MPI_Comm comm = mat->comm; 2045 Mat_SeqAIJ *aij; 2046 2047 2048 PetscFunctionBegin; 2049 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2050 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2051 2052 if (call == MAT_REUSE_MATRIX) { 2053 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2054 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2055 local = &Mreuse; 2056 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2057 } else { 2058 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2059 Mreuse = *local; 2060 ierr = PetscFree(local);CHKERRQ(ierr); 2061 } 2062 2063 /* 2064 m - number of local rows 2065 n - number of columns (same on all processors) 2066 rstart - first row in new global matrix generated 2067 */ 2068 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2069 if (call == MAT_INITIAL_MATRIX) { 2070 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2071 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 2072 ii = aij->i; 2073 jj = aij->j; 2074 2075 /* 2076 Determine the number of non-zeros in the diagonal and off-diagonal 2077 portions of the matrix in order to do correct preallocation 2078 */ 2079 2080 /* first get start and end of "diagonal" columns */ 2081 if (csize == PETSC_DECIDE) { 2082 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2083 if (mglobal == n) { /* square matrix */ 2084 nlocal = m; 2085 } else { 2086 nlocal = n/size + ((n % size) > rank); 2087 } 2088 } else { 2089 nlocal = csize; 2090 } 2091 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2092 rstart = rend - nlocal; 2093 if (rank == size - 1 && rend != n) { 2094 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 2095 } 2096 2097 /* next, compute all the lengths */ 2098 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2099 olens = dlens + m; 2100 for (i=0; i<m; i++) { 2101 jend = ii[i+1] - ii[i]; 2102 olen = 0; 2103 dlen = 0; 2104 for (j=0; j<jend; j++) { 2105 if (*jj < rstart || *jj >= rend) olen++; 2106 else dlen++; 2107 jj++; 2108 } 2109 olens[i] = olen; 2110 dlens[i] = dlen; 2111 } 2112 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2113 ierr = PetscFree(dlens);CHKERRQ(ierr); 2114 } else { 2115 int ml,nl; 2116 2117 M = *newmat; 2118 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2119 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2120 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2121 /* 2122 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2123 rather than the slower MatSetValues(). 2124 */ 2125 M->was_assembled = PETSC_TRUE; 2126 M->assembled = PETSC_FALSE; 2127 } 2128 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2129 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2130 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 2131 ii = aij->i; 2132 jj = aij->j; 2133 aa = aij->a; 2134 for (i=0; i<m; i++) { 2135 row = rstart + i; 2136 nz = ii[i+1] - ii[i]; 2137 cwork = jj; jj += nz; 2138 vwork = aa; aa += nz; 2139 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2140 } 2141 2142 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2143 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2144 *newmat = M; 2145 2146 /* save submatrix used in processor for next request */ 2147 if (call == MAT_INITIAL_MATRIX) { 2148 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2149 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2150 } 2151 2152 PetscFunctionReturn(0); 2153 } 2154 2155 #undef __FUNCT__ 2156 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2157 /*@C 2158 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2159 (the default parallel PETSc format). For good matrix assembly performance 2160 the user should preallocate the matrix storage by setting the parameters 2161 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2162 performance can be increased by more than a factor of 50. 2163 2164 Collective on MPI_Comm 2165 2166 Input Parameters: 2167 + A - the matrix 2168 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2169 (same value is used for all local rows) 2170 . d_nnz - array containing the number of nonzeros in the various rows of the 2171 DIAGONAL portion of the local submatrix (possibly different for each row) 2172 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2173 The size of this array is equal to the number of local rows, i.e 'm'. 2174 You must leave room for the diagonal entry even if it is zero. 2175 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2176 submatrix (same value is used for all local rows). 2177 - o_nnz - array containing the number of nonzeros in the various rows of the 2178 OFF-DIAGONAL portion of the local submatrix (possibly different for 2179 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2180 structure. The size of this array is equal to the number 2181 of local rows, i.e 'm'. 2182 2183 The AIJ format (also called the Yale sparse matrix format or 2184 compressed row storage), is fully compatible with standard Fortran 77 2185 storage. That is, the stored row and column indices can begin at 2186 either one (as in Fortran) or zero. See the users manual for details. 2187 2188 The user MUST specify either the local or global matrix dimensions 2189 (possibly both). 2190 2191 The parallel matrix is partitioned such that the first m0 rows belong to 2192 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2193 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2194 2195 The DIAGONAL portion of the local submatrix of a processor can be defined 2196 as the submatrix which is obtained by extraction the part corresponding 2197 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2198 first row that belongs to the processor, and r2 is the last row belonging 2199 to the this processor. This is a square mxm matrix. The remaining portion 2200 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2201 2202 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2203 2204 By default, this format uses inodes (identical nodes) when possible. 2205 We search for consecutive rows with the same nonzero structure, thereby 2206 reusing matrix information to achieve increased efficiency. 2207 2208 Options Database Keys: 2209 + -mat_aij_no_inode - Do not use inodes 2210 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2211 - -mat_aij_oneindex - Internally use indexing starting at 1 2212 rather than 0. Note that when calling MatSetValues(), 2213 the user still MUST index entries starting at 0! 2214 2215 Example usage: 2216 2217 Consider the following 8x8 matrix with 34 non-zero values, that is 2218 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2219 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2220 as follows: 2221 2222 .vb 2223 1 2 0 | 0 3 0 | 0 4 2224 Proc0 0 5 6 | 7 0 0 | 8 0 2225 9 0 10 | 11 0 0 | 12 0 2226 ------------------------------------- 2227 13 0 14 | 15 16 17 | 0 0 2228 Proc1 0 18 0 | 19 20 21 | 0 0 2229 0 0 0 | 22 23 0 | 24 0 2230 ------------------------------------- 2231 Proc2 25 26 27 | 0 0 28 | 29 0 2232 30 0 0 | 31 32 33 | 0 34 2233 .ve 2234 2235 This can be represented as a collection of submatrices as: 2236 2237 .vb 2238 A B C 2239 D E F 2240 G H I 2241 .ve 2242 2243 Where the submatrices A,B,C are owned by proc0, D,E,F are 2244 owned by proc1, G,H,I are owned by proc2. 2245 2246 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2247 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2248 The 'M','N' parameters are 8,8, and have the same values on all procs. 2249 2250 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2251 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2252 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2253 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2254 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2255 matrix, ans [DF] as another SeqAIJ matrix. 2256 2257 When d_nz, o_nz parameters are specified, d_nz storage elements are 2258 allocated for every row of the local diagonal submatrix, and o_nz 2259 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2260 One way to choose d_nz and o_nz is to use the max nonzerors per local 2261 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2262 In this case, the values of d_nz,o_nz are: 2263 .vb 2264 proc0 : dnz = 2, o_nz = 2 2265 proc1 : dnz = 3, o_nz = 2 2266 proc2 : dnz = 1, o_nz = 4 2267 .ve 2268 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2269 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2270 for proc3. i.e we are using 12+15+10=37 storage locations to store 2271 34 values. 2272 2273 When d_nnz, o_nnz parameters are specified, the storage is specified 2274 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2275 In the above case the values for d_nnz,o_nnz are: 2276 .vb 2277 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2278 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2279 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2280 .ve 2281 Here the space allocated is sum of all the above values i.e 34, and 2282 hence pre-allocation is perfect. 2283 2284 Level: intermediate 2285 2286 .keywords: matrix, aij, compressed row, sparse, parallel 2287 2288 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2289 @*/ 2290 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2291 { 2292 Mat_MPIAIJ *b; 2293 int ierr,i; 2294 PetscTruth flg2; 2295 2296 PetscFunctionBegin; 2297 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2298 if (!flg2) PetscFunctionReturn(0); 2299 B->preallocated = PETSC_TRUE; 2300 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2301 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2302 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2303 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2304 if (d_nnz) { 2305 for (i=0; i<B->m; i++) { 2306 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2307 } 2308 } 2309 if (o_nnz) { 2310 for (i=0; i<B->m; i++) { 2311 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2312 } 2313 } 2314 b = (Mat_MPIAIJ*)B->data; 2315 2316 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2317 PetscLogObjectParent(B,b->A); 2318 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2319 PetscLogObjectParent(B,b->B); 2320 2321 PetscFunctionReturn(0); 2322 } 2323 2324 #undef __FUNCT__ 2325 #define __FUNCT__ "MatCreateMPIAIJ" 2326 /*@C 2327 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2328 (the default parallel PETSc format). For good matrix assembly performance 2329 the user should preallocate the matrix storage by setting the parameters 2330 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2331 performance can be increased by more than a factor of 50. 2332 2333 Collective on MPI_Comm 2334 2335 Input Parameters: 2336 + comm - MPI communicator 2337 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2338 This value should be the same as the local size used in creating the 2339 y vector for the matrix-vector product y = Ax. 2340 . n - This value should be the same as the local size used in creating the 2341 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2342 calculated if N is given) For square matrices n is almost always m. 2343 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2344 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2345 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2346 (same value is used for all local rows) 2347 . d_nnz - array containing the number of nonzeros in the various rows of the 2348 DIAGONAL portion of the local submatrix (possibly different for each row) 2349 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2350 The size of this array is equal to the number of local rows, i.e 'm'. 2351 You must leave room for the diagonal entry even if it is zero. 2352 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2353 submatrix (same value is used for all local rows). 2354 - o_nnz - array containing the number of nonzeros in the various rows of the 2355 OFF-DIAGONAL portion of the local submatrix (possibly different for 2356 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2357 structure. The size of this array is equal to the number 2358 of local rows, i.e 'm'. 2359 2360 Output Parameter: 2361 . A - the matrix 2362 2363 Notes: 2364 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2365 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2366 storage requirements for this matrix. 2367 2368 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2369 processor than it must be used on all processors that share the object for 2370 that argument. 2371 2372 The AIJ format (also called the Yale sparse matrix format or 2373 compressed row storage), is fully compatible with standard Fortran 77 2374 storage. That is, the stored row and column indices can begin at 2375 either one (as in Fortran) or zero. See the users manual for details. 2376 2377 The user MUST specify either the local or global matrix dimensions 2378 (possibly both). 2379 2380 The parallel matrix is partitioned such that the first m0 rows belong to 2381 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2382 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2383 2384 The DIAGONAL portion of the local submatrix of a processor can be defined 2385 as the submatrix which is obtained by extraction the part corresponding 2386 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2387 first row that belongs to the processor, and r2 is the last row belonging 2388 to the this processor. This is a square mxm matrix. The remaining portion 2389 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2390 2391 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2392 2393 By default, this format uses inodes (identical nodes) when possible. 2394 We search for consecutive rows with the same nonzero structure, thereby 2395 reusing matrix information to achieve increased efficiency. 2396 2397 Options Database Keys: 2398 + -mat_aij_no_inode - Do not use inodes 2399 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2400 - -mat_aij_oneindex - Internally use indexing starting at 1 2401 rather than 0. Note that when calling MatSetValues(), 2402 the user still MUST index entries starting at 0! 2403 2404 2405 Example usage: 2406 2407 Consider the following 8x8 matrix with 34 non-zero values, that is 2408 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2409 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2410 as follows: 2411 2412 .vb 2413 1 2 0 | 0 3 0 | 0 4 2414 Proc0 0 5 6 | 7 0 0 | 8 0 2415 9 0 10 | 11 0 0 | 12 0 2416 ------------------------------------- 2417 13 0 14 | 15 16 17 | 0 0 2418 Proc1 0 18 0 | 19 20 21 | 0 0 2419 0 0 0 | 22 23 0 | 24 0 2420 ------------------------------------- 2421 Proc2 25 26 27 | 0 0 28 | 29 0 2422 30 0 0 | 31 32 33 | 0 34 2423 .ve 2424 2425 This can be represented as a collection of submatrices as: 2426 2427 .vb 2428 A B C 2429 D E F 2430 G H I 2431 .ve 2432 2433 Where the submatrices A,B,C are owned by proc0, D,E,F are 2434 owned by proc1, G,H,I are owned by proc2. 2435 2436 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2437 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2438 The 'M','N' parameters are 8,8, and have the same values on all procs. 2439 2440 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2441 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2442 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2443 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2444 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2445 matrix, ans [DF] as another SeqAIJ matrix. 2446 2447 When d_nz, o_nz parameters are specified, d_nz storage elements are 2448 allocated for every row of the local diagonal submatrix, and o_nz 2449 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2450 One way to choose d_nz and o_nz is to use the max nonzerors per local 2451 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2452 In this case, the values of d_nz,o_nz are: 2453 .vb 2454 proc0 : dnz = 2, o_nz = 2 2455 proc1 : dnz = 3, o_nz = 2 2456 proc2 : dnz = 1, o_nz = 4 2457 .ve 2458 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2459 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2460 for proc3. i.e we are using 12+15+10=37 storage locations to store 2461 34 values. 2462 2463 When d_nnz, o_nnz parameters are specified, the storage is specified 2464 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2465 In the above case the values for d_nnz,o_nnz are: 2466 .vb 2467 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2468 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2469 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2470 .ve 2471 Here the space allocated is sum of all the above values i.e 34, and 2472 hence pre-allocation is perfect. 2473 2474 Level: intermediate 2475 2476 .keywords: matrix, aij, compressed row, sparse, parallel 2477 2478 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2479 @*/ 2480 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2481 { 2482 int ierr,size; 2483 2484 PetscFunctionBegin; 2485 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2486 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2487 if (size > 1) { 2488 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2489 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2490 } else { 2491 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2492 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2493 } 2494 PetscFunctionReturn(0); 2495 } 2496 2497 #undef __FUNCT__ 2498 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2499 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2500 { 2501 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2502 PetscFunctionBegin; 2503 *Ad = a->A; 2504 *Ao = a->B; 2505 *colmap = a->garray; 2506 PetscFunctionReturn(0); 2507 } 2508 2509 #undef __FUNCT__ 2510 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2511 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2512 { 2513 int ierr,i; 2514 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2515 2516 PetscFunctionBegin; 2517 if (coloring->ctype == IS_COLORING_LOCAL) { 2518 ISColoringValue *allcolors,*colors; 2519 ISColoring ocoloring; 2520 2521 /* set coloring for diagonal portion */ 2522 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2523 2524 /* set coloring for off-diagonal portion */ 2525 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2526 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2527 for (i=0; i<a->B->n; i++) { 2528 colors[i] = allcolors[a->garray[i]]; 2529 } 2530 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2531 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2532 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2533 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2534 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2535 ISColoringValue *colors; 2536 int *larray; 2537 ISColoring ocoloring; 2538 2539 /* set coloring for diagonal portion */ 2540 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2541 for (i=0; i<a->A->n; i++) { 2542 larray[i] = i + a->cstart; 2543 } 2544 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2545 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2546 for (i=0; i<a->A->n; i++) { 2547 colors[i] = coloring->colors[larray[i]]; 2548 } 2549 ierr = PetscFree(larray);CHKERRQ(ierr); 2550 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2551 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2552 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2553 2554 /* set coloring for off-diagonal portion */ 2555 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2556 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2557 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2558 for (i=0; i<a->B->n; i++) { 2559 colors[i] = coloring->colors[larray[i]]; 2560 } 2561 ierr = PetscFree(larray);CHKERRQ(ierr); 2562 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2563 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2564 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2565 } else { 2566 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2567 } 2568 2569 PetscFunctionReturn(0); 2570 } 2571 2572 #undef __FUNCT__ 2573 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2574 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2575 { 2576 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2577 int ierr; 2578 2579 PetscFunctionBegin; 2580 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2581 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2582 PetscFunctionReturn(0); 2583 } 2584 2585 #undef __FUNCT__ 2586 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2587 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2588 { 2589 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2590 int ierr; 2591 2592 PetscFunctionBegin; 2593 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2594 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2595 PetscFunctionReturn(0); 2596 } 2597 2598 #undef __FUNCT__ 2599 #define __FUNCT__ "MatMerge" 2600 /*@C 2601 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2602 matrices from each processor 2603 2604 Collective on MPI_Comm 2605 2606 Input Parameters: 2607 + comm - the communicators the parallel matrix will live on 2608 - inmat - the input sequential matrices 2609 2610 Output Parameter: 2611 . outmat - the parallel matrix generated 2612 2613 Level: advanced 2614 2615 Notes: The number of columns of the matrix in EACH of the seperate files 2616 MUST be the same. 2617 2618 @*/ 2619 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2620 { 2621 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2622 PetscScalar *values; 2623 PetscMap columnmap,rowmap; 2624 2625 PetscFunctionBegin; 2626 2627 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2628 2629 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2630 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2631 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2632 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2633 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2634 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2635 2636 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2637 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2638 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2639 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2640 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2641 2642 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2643 for (i=0;i<m;i++) { 2644 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2645 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2646 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2647 } 2648 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2649 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2650 2651 for (i=0;i<m;i++) { 2652 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2653 I = i + rstart; 2654 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2655 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2656 } 2657 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2658 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2659 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2660 2661 PetscFunctionReturn(0); 2662 } 2663 2664 #undef __FUNCT__ 2665 #define __FUNCT__ "MatFileSplit" 2666 int MatFileSplit(Mat A,char *outfile) 2667 { 2668 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2669 PetscViewer out; 2670 char *name; 2671 Mat B; 2672 PetscScalar *values; 2673 2674 PetscFunctionBegin; 2675 2676 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2677 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2678 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2679 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2680 for (i=0;i<m;i++) { 2681 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2682 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2683 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2684 } 2685 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2686 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2687 2688 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2689 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2690 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2691 sprintf(name,"%s.%d",outfile,rank); 2692 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2693 ierr = PetscFree(name); 2694 ierr = MatView(B,out);CHKERRQ(ierr); 2695 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2696 ierr = MatDestroy(B);CHKERRQ(ierr); 2697 PetscFunctionReturn(0); 2698 } 2699