1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 int i,j,rstart,ncols,n,ierr,flg; 354 int *row,*col,other_disassembled; 355 PetscScalar *val; 356 InsertMode addv = mat->insertmode; 357 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) 358 PetscTruth flag; 359 #endif 360 361 PetscFunctionBegin; 362 if (!aij->donotstash) { 363 while (1) { 364 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 365 if (!flg) break; 366 367 for (i=0; i<n;) { 368 /* Now identify the consecutive vals belonging to the same row */ 369 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 370 if (j < n) ncols = j-i; 371 else ncols = n-i; 372 /* Now assemble all these values with a single function call */ 373 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 374 i = j; 375 } 376 } 377 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 378 } 379 380 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 381 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 382 383 /* determine if any processor has disassembled, if so we must 384 also disassemble ourselfs, in order that we may reassemble. */ 385 /* 386 if nonzero structure of submatrix B cannot change then we know that 387 no processor disassembled thus we can skip this stuff 388 */ 389 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 390 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 391 if (mat->was_assembled && !other_disassembled) { 392 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 393 } 394 } 395 396 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 397 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 398 } 399 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 400 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 401 402 if (aij->rowvalues) { 403 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 404 aij->rowvalues = 0; 405 } 406 #if defined(PETSC_HAVE_SUPERLUDIST) 407 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 408 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 409 #endif 410 411 #if defined(PETSC_HAVE_SPOOLES) 412 ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr); 413 if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); } 414 #endif 415 PetscFunctionReturn(0); 416 } 417 418 #undef __FUNCT__ 419 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 420 int MatZeroEntries_MPIAIJ(Mat A) 421 { 422 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 423 int ierr; 424 425 PetscFunctionBegin; 426 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 427 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 428 PetscFunctionReturn(0); 429 } 430 431 #undef __FUNCT__ 432 #define __FUNCT__ "MatZeroRows_MPIAIJ" 433 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 434 { 435 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 436 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 437 int *nprocs,j,idx,nsends,row; 438 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 439 int *rvalues,tag = A->tag,count,base,slen,n,*source; 440 int *lens,imdex,*lrows,*values,rstart=l->rstart; 441 MPI_Comm comm = A->comm; 442 MPI_Request *send_waits,*recv_waits; 443 MPI_Status recv_status,*send_status; 444 IS istmp; 445 PetscTruth found; 446 447 PetscFunctionBegin; 448 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 449 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 450 451 /* first count number of contributors to each processor */ 452 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 453 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 454 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 455 for (i=0; i<N; i++) { 456 idx = rows[i]; 457 found = PETSC_FALSE; 458 for (j=0; j<size; j++) { 459 if (idx >= owners[j] && idx < owners[j+1]) { 460 nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break; 461 } 462 } 463 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 464 } 465 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 466 467 /* inform other processors of number of messages and max length*/ 468 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 469 470 /* post receives: */ 471 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 472 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 473 for (i=0; i<nrecvs; i++) { 474 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 475 } 476 477 /* do sends: 478 1) starts[i] gives the starting index in svalues for stuff going to 479 the ith processor 480 */ 481 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 482 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 483 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 484 starts[0] = 0; 485 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 486 for (i=0; i<N; i++) { 487 svalues[starts[owner[i]]++] = rows[i]; 488 } 489 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 490 491 starts[0] = 0; 492 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 493 count = 0; 494 for (i=0; i<size; i++) { 495 if (nprocs[2*i+1]) { 496 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 497 } 498 } 499 ierr = PetscFree(starts);CHKERRQ(ierr); 500 501 base = owners[rank]; 502 503 /* wait on receives */ 504 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 505 source = lens + nrecvs; 506 count = nrecvs; slen = 0; 507 while (count) { 508 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 509 /* unpack receives into our local space */ 510 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 511 source[imdex] = recv_status.MPI_SOURCE; 512 lens[imdex] = n; 513 slen += n; 514 count--; 515 } 516 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 517 518 /* move the data into the send scatter */ 519 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 520 count = 0; 521 for (i=0; i<nrecvs; i++) { 522 values = rvalues + i*nmax; 523 for (j=0; j<lens[i]; j++) { 524 lrows[count++] = values[j] - base; 525 } 526 } 527 ierr = PetscFree(rvalues);CHKERRQ(ierr); 528 ierr = PetscFree(lens);CHKERRQ(ierr); 529 ierr = PetscFree(owner);CHKERRQ(ierr); 530 ierr = PetscFree(nprocs);CHKERRQ(ierr); 531 532 /* actually zap the local rows */ 533 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 534 PetscLogObjectParent(A,istmp); 535 536 /* 537 Zero the required rows. If the "diagonal block" of the matrix 538 is square and the user wishes to set the diagonal we use seperate 539 code so that MatSetValues() is not called for each diagonal allocating 540 new memory, thus calling lots of mallocs and slowing things down. 541 542 Contributed by: Mathew Knepley 543 */ 544 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 545 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 546 if (diag && (l->A->M == l->A->N)) { 547 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 548 } else if (diag) { 549 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 550 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 551 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 552 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 553 } 554 for (i = 0; i < slen; i++) { 555 row = lrows[i] + rstart; 556 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 557 } 558 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 559 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 560 } else { 561 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 562 } 563 ierr = ISDestroy(istmp);CHKERRQ(ierr); 564 ierr = PetscFree(lrows);CHKERRQ(ierr); 565 566 /* wait on sends */ 567 if (nsends) { 568 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 569 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 570 ierr = PetscFree(send_status);CHKERRQ(ierr); 571 } 572 ierr = PetscFree(send_waits);CHKERRQ(ierr); 573 ierr = PetscFree(svalues);CHKERRQ(ierr); 574 575 PetscFunctionReturn(0); 576 } 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatMult_MPIAIJ" 580 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 581 { 582 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 583 int ierr,nt; 584 585 PetscFunctionBegin; 586 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 587 if (nt != A->n) { 588 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 589 } 590 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 591 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 592 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 593 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 #undef __FUNCT__ 598 #define __FUNCT__ "MatMultAdd_MPIAIJ" 599 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 600 { 601 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 602 int ierr; 603 604 PetscFunctionBegin; 605 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 606 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 607 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 608 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 614 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 615 { 616 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 617 int ierr; 618 619 PetscFunctionBegin; 620 /* do nondiagonal part */ 621 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 622 /* send it on its way */ 623 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 624 /* do local part */ 625 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 626 /* receive remote parts: note this assumes the values are not actually */ 627 /* inserted in yy until the next line, which is true for my implementation*/ 628 /* but is not perhaps always true. */ 629 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 #undef __FUNCT__ 634 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 635 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 636 { 637 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 638 int ierr; 639 640 PetscFunctionBegin; 641 /* do nondiagonal part */ 642 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 643 /* send it on its way */ 644 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 645 /* do local part */ 646 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 647 /* receive remote parts: note this assumes the values are not actually */ 648 /* inserted in yy until the next line, which is true for my implementation*/ 649 /* but is not perhaps always true. */ 650 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 651 PetscFunctionReturn(0); 652 } 653 654 /* 655 This only works correctly for square matrices where the subblock A->A is the 656 diagonal block 657 */ 658 #undef __FUNCT__ 659 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 660 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 661 { 662 int ierr; 663 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 664 665 PetscFunctionBegin; 666 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 667 if (a->rstart != a->cstart || a->rend != a->cend) { 668 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 669 } 670 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 671 PetscFunctionReturn(0); 672 } 673 674 #undef __FUNCT__ 675 #define __FUNCT__ "MatScale_MPIAIJ" 676 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 677 { 678 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 679 int ierr; 680 681 PetscFunctionBegin; 682 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 683 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 684 PetscFunctionReturn(0); 685 } 686 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatDestroy_MPIAIJ" 689 int MatDestroy_MPIAIJ(Mat mat) 690 { 691 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 692 int ierr; 693 694 PetscFunctionBegin; 695 #if defined(PETSC_USE_LOG) 696 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 697 #endif 698 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 699 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 700 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 701 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 702 #if defined (PETSC_USE_CTABLE) 703 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 704 #else 705 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 706 #endif 707 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 708 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 709 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 710 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 711 ierr = PetscFree(aij);CHKERRQ(ierr); 712 PetscFunctionReturn(0); 713 } 714 715 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer); 716 extern int MatFactorInfo_Spooles(Mat,PetscViewer); 717 718 #undef __FUNCT__ 719 #define __FUNCT__ "MatView_MPIAIJ_Binary" 720 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 721 { 722 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 723 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 724 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 725 int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag; 726 int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz; 727 PetscScalar *column_values; 728 729 PetscFunctionBegin; 730 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 731 ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr); 732 nz = A->nz + B->nz; 733 if (rank == 0) { 734 header[0] = MAT_FILE_COOKIE; 735 header[1] = mat->M; 736 header[2] = mat->N; 737 ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 738 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 739 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr); 740 /* get largest number of rows any processor has */ 741 rlen = mat->m; 742 ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr); 743 for (i=1; i<size; i++) { 744 rlen = PetscMax(rlen,range[i+1] - range[i]); 745 } 746 } else { 747 ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr); 748 rlen = mat->m; 749 } 750 751 /* load up the local row counts */ 752 ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr); 753 for (i=0; i<mat->m; i++) { 754 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 755 } 756 757 /* store the row lengths to the file */ 758 if (rank == 0) { 759 MPI_Status status; 760 ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr); 761 for (i=1; i<size; i++) { 762 rlen = range[i+1] - range[i]; 763 ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 764 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr); 765 } 766 } else { 767 ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 768 } 769 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 770 771 /* load up the local column indices */ 772 nzmax = nz; /* )th processor needs space a largest processor needs */ 773 ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr); 774 ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr); 775 cnt = 0; 776 for (i=0; i<mat->m; i++) { 777 for (j=B->i[i]; j<B->i[i+1]; j++) { 778 if ( (col = garray[B->j[j]]) > cstart) break; 779 column_indices[cnt++] = col; 780 } 781 for (k=A->i[i]; k<A->i[i+1]; k++) { 782 column_indices[cnt++] = A->j[k] + cstart; 783 } 784 for (; j<B->i[i+1]; j++) { 785 column_indices[cnt++] = garray[B->j[j]]; 786 } 787 } 788 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 789 790 /* store the column indices to the file */ 791 if (rank == 0) { 792 MPI_Status status; 793 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr); 794 for (i=1; i<size; i++) { 795 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 796 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 797 ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 798 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr); 799 } 800 } else { 801 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 802 ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 803 } 804 ierr = PetscFree(column_indices);CHKERRQ(ierr); 805 806 /* load up the local column values */ 807 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 808 cnt = 0; 809 for (i=0; i<mat->m; i++) { 810 for (j=B->i[i]; j<B->i[i+1]; j++) { 811 if ( garray[B->j[j]] > cstart) break; 812 column_values[cnt++] = B->a[j]; 813 } 814 for (k=A->i[i]; k<A->i[i+1]; k++) { 815 column_values[cnt++] = A->a[k]; 816 } 817 for (; j<B->i[i+1]; j++) { 818 column_values[cnt++] = B->a[j]; 819 } 820 } 821 if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz); 822 823 /* store the column values to the file */ 824 if (rank == 0) { 825 MPI_Status status; 826 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr); 827 for (i=1; i<size; i++) { 828 ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr); 829 if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax); 830 ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr); 831 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr); 832 } 833 } else { 834 ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr); 835 ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr); 836 } 837 ierr = PetscFree(column_values);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 #undef __FUNCT__ 842 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 843 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 844 { 845 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 846 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 847 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 848 PetscTruth isdraw,isascii,flg,isbinary; 849 PetscViewer sviewer; 850 PetscViewerFormat format; 851 852 PetscFunctionBegin; 853 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 854 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 855 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 856 if (isascii) { 857 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 858 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 859 MatInfo info; 860 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 861 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 862 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 863 if (flg) { 864 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 865 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 866 } else { 867 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 868 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 869 } 870 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 871 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 872 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 873 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 874 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 875 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 876 PetscFunctionReturn(0); 877 } else if (format == PETSC_VIEWER_ASCII_INFO) { 878 PetscFunctionReturn(0); 879 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 880 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE) 881 ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr); 882 #endif 883 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) 884 ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr); 885 #endif 886 887 PetscFunctionReturn(0); 888 } 889 } else if (isbinary) { 890 if (size == 1) { 891 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 892 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 893 } else { 894 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 895 } 896 PetscFunctionReturn(0); 897 } else if (isdraw) { 898 PetscDraw draw; 899 PetscTruth isnull; 900 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 901 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 902 } 903 904 if (size == 1) { 905 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 906 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 907 } else { 908 /* assemble the entire matrix onto first processor. */ 909 Mat A; 910 Mat_SeqAIJ *Aloc; 911 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 912 PetscScalar *a; 913 914 if (!rank) { 915 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 916 } else { 917 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 918 } 919 PetscLogObjectParent(mat,A); 920 921 /* copy over the A part */ 922 Aloc = (Mat_SeqAIJ*)aij->A->data; 923 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 924 row = aij->rstart; 925 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 926 for (i=0; i<m; i++) { 927 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 928 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 929 } 930 aj = Aloc->j; 931 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 932 933 /* copy over the B part */ 934 Aloc = (Mat_SeqAIJ*)aij->B->data; 935 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 936 row = aij->rstart; 937 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 938 ct = cols; 939 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 940 for (i=0; i<m; i++) { 941 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 942 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 943 } 944 ierr = PetscFree(ct);CHKERRQ(ierr); 945 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 946 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 947 /* 948 Everyone has to call to draw the matrix since the graphics waits are 949 synchronized across all processors that share the PetscDraw object 950 */ 951 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 952 if (!rank) { 953 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 954 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 955 } 956 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 957 ierr = MatDestroy(A);CHKERRQ(ierr); 958 } 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatView_MPIAIJ" 964 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 965 { 966 int ierr; 967 PetscTruth isascii,isdraw,issocket,isbinary; 968 969 PetscFunctionBegin; 970 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 971 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 972 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 973 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 974 if (isascii || isdraw || isbinary || issocket) { 975 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 976 } else { 977 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 978 } 979 PetscFunctionReturn(0); 980 } 981 982 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatRelax_MPIAIJ" 986 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 987 { 988 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 989 int ierr; 990 Vec bb1; 991 PetscScalar mone=-1.0; 992 993 PetscFunctionBegin; 994 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 995 996 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 997 998 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 999 if (flag & SOR_ZERO_INITIAL_GUESS) { 1000 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 1001 its--; 1002 } 1003 1004 while (its--) { 1005 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1007 1008 /* update rhs: bb1 = bb - B*x */ 1009 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1010 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1011 1012 /* local sweep */ 1013 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 1014 CHKERRQ(ierr); 1015 } 1016 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1017 if (flag & SOR_ZERO_INITIAL_GUESS) { 1018 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1019 its--; 1020 } 1021 while (its--) { 1022 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1023 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1024 1025 /* update rhs: bb1 = bb - B*x */ 1026 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1027 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1028 1029 /* local sweep */ 1030 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1031 CHKERRQ(ierr); 1032 } 1033 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1034 if (flag & SOR_ZERO_INITIAL_GUESS) { 1035 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 1036 its--; 1037 } 1038 while (its--) { 1039 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1040 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1041 1042 /* update rhs: bb1 = bb - B*x */ 1043 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 1044 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1045 1046 /* local sweep */ 1047 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 1048 CHKERRQ(ierr); 1049 } 1050 } else { 1051 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 1052 } 1053 1054 ierr = VecDestroy(bb1);CHKERRQ(ierr); 1055 PetscFunctionReturn(0); 1056 } 1057 1058 #undef __FUNCT__ 1059 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1060 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1061 { 1062 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1063 Mat A = mat->A,B = mat->B; 1064 int ierr; 1065 PetscReal isend[5],irecv[5]; 1066 1067 PetscFunctionBegin; 1068 info->block_size = 1.0; 1069 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1070 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1071 isend[3] = info->memory; isend[4] = info->mallocs; 1072 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1073 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1074 isend[3] += info->memory; isend[4] += info->mallocs; 1075 if (flag == MAT_LOCAL) { 1076 info->nz_used = isend[0]; 1077 info->nz_allocated = isend[1]; 1078 info->nz_unneeded = isend[2]; 1079 info->memory = isend[3]; 1080 info->mallocs = isend[4]; 1081 } else if (flag == MAT_GLOBAL_MAX) { 1082 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1083 info->nz_used = irecv[0]; 1084 info->nz_allocated = irecv[1]; 1085 info->nz_unneeded = irecv[2]; 1086 info->memory = irecv[3]; 1087 info->mallocs = irecv[4]; 1088 } else if (flag == MAT_GLOBAL_SUM) { 1089 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1090 info->nz_used = irecv[0]; 1091 info->nz_allocated = irecv[1]; 1092 info->nz_unneeded = irecv[2]; 1093 info->memory = irecv[3]; 1094 info->mallocs = irecv[4]; 1095 } 1096 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1097 info->fill_ratio_needed = 0; 1098 info->factor_mallocs = 0; 1099 info->rows_global = (double)matin->M; 1100 info->columns_global = (double)matin->N; 1101 info->rows_local = (double)matin->m; 1102 info->columns_local = (double)matin->N; 1103 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatSetOption_MPIAIJ" 1109 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1110 { 1111 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1112 int ierr; 1113 1114 PetscFunctionBegin; 1115 switch (op) { 1116 case MAT_NO_NEW_NONZERO_LOCATIONS: 1117 case MAT_YES_NEW_NONZERO_LOCATIONS: 1118 case MAT_COLUMNS_UNSORTED: 1119 case MAT_COLUMNS_SORTED: 1120 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1121 case MAT_KEEP_ZEROED_ROWS: 1122 case MAT_NEW_NONZERO_LOCATION_ERR: 1123 case MAT_USE_INODES: 1124 case MAT_DO_NOT_USE_INODES: 1125 case MAT_IGNORE_ZERO_ENTRIES: 1126 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1127 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1128 break; 1129 case MAT_ROW_ORIENTED: 1130 a->roworiented = PETSC_TRUE; 1131 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1132 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1133 break; 1134 case MAT_ROWS_SORTED: 1135 case MAT_ROWS_UNSORTED: 1136 case MAT_YES_NEW_DIAGONALS: 1137 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1138 break; 1139 case MAT_COLUMN_ORIENTED: 1140 a->roworiented = PETSC_FALSE; 1141 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1142 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1143 break; 1144 case MAT_IGNORE_OFF_PROC_ENTRIES: 1145 a->donotstash = PETSC_TRUE; 1146 break; 1147 case MAT_NO_NEW_DIAGONALS: 1148 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1149 default: 1150 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1151 } 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatGetRow_MPIAIJ" 1157 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1158 { 1159 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1160 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1161 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1162 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1163 int *cmap,*idx_p; 1164 1165 PetscFunctionBegin; 1166 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1167 mat->getrowactive = PETSC_TRUE; 1168 1169 if (!mat->rowvalues && (idx || v)) { 1170 /* 1171 allocate enough space to hold information from the longest row. 1172 */ 1173 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1174 int max = 1,tmp; 1175 for (i=0; i<matin->m; i++) { 1176 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1177 if (max < tmp) { max = tmp; } 1178 } 1179 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1180 mat->rowindices = (int*)(mat->rowvalues + max); 1181 } 1182 1183 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1184 lrow = row - rstart; 1185 1186 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1187 if (!v) {pvA = 0; pvB = 0;} 1188 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1189 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1190 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1191 nztot = nzA + nzB; 1192 1193 cmap = mat->garray; 1194 if (v || idx) { 1195 if (nztot) { 1196 /* Sort by increasing column numbers, assuming A and B already sorted */ 1197 int imark = -1; 1198 if (v) { 1199 *v = v_p = mat->rowvalues; 1200 for (i=0; i<nzB; i++) { 1201 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1202 else break; 1203 } 1204 imark = i; 1205 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1206 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1207 } 1208 if (idx) { 1209 *idx = idx_p = mat->rowindices; 1210 if (imark > -1) { 1211 for (i=0; i<imark; i++) { 1212 idx_p[i] = cmap[cworkB[i]]; 1213 } 1214 } else { 1215 for (i=0; i<nzB; i++) { 1216 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1217 else break; 1218 } 1219 imark = i; 1220 } 1221 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1222 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1223 } 1224 } else { 1225 if (idx) *idx = 0; 1226 if (v) *v = 0; 1227 } 1228 } 1229 *nz = nztot; 1230 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1231 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1237 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1238 { 1239 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1240 1241 PetscFunctionBegin; 1242 if (aij->getrowactive == PETSC_FALSE) { 1243 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1244 } 1245 aij->getrowactive = PETSC_FALSE; 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatNorm_MPIAIJ" 1251 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1252 { 1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1254 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1255 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1256 PetscReal sum = 0.0; 1257 PetscScalar *v; 1258 1259 PetscFunctionBegin; 1260 if (aij->size == 1) { 1261 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1262 } else { 1263 if (type == NORM_FROBENIUS) { 1264 v = amat->a; 1265 for (i=0; i<amat->nz; i++) { 1266 #if defined(PETSC_USE_COMPLEX) 1267 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1268 #else 1269 sum += (*v)*(*v); v++; 1270 #endif 1271 } 1272 v = bmat->a; 1273 for (i=0; i<bmat->nz; i++) { 1274 #if defined(PETSC_USE_COMPLEX) 1275 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1276 #else 1277 sum += (*v)*(*v); v++; 1278 #endif 1279 } 1280 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1281 *norm = sqrt(*norm); 1282 } else if (type == NORM_1) { /* max column norm */ 1283 PetscReal *tmp,*tmp2; 1284 int *jj,*garray = aij->garray; 1285 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1286 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1287 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1288 *norm = 0.0; 1289 v = amat->a; jj = amat->j; 1290 for (j=0; j<amat->nz; j++) { 1291 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1292 } 1293 v = bmat->a; jj = bmat->j; 1294 for (j=0; j<bmat->nz; j++) { 1295 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1296 } 1297 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1298 for (j=0; j<mat->N; j++) { 1299 if (tmp2[j] > *norm) *norm = tmp2[j]; 1300 } 1301 ierr = PetscFree(tmp);CHKERRQ(ierr); 1302 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1303 } else if (type == NORM_INFINITY) { /* max row norm */ 1304 PetscReal ntemp = 0.0; 1305 for (j=0; j<aij->A->m; j++) { 1306 v = amat->a + amat->i[j] + shift; 1307 sum = 0.0; 1308 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1309 sum += PetscAbsScalar(*v); v++; 1310 } 1311 v = bmat->a + bmat->i[j] + shift; 1312 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1313 sum += PetscAbsScalar(*v); v++; 1314 } 1315 if (sum > ntemp) ntemp = sum; 1316 } 1317 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1318 } else { 1319 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1320 } 1321 } 1322 PetscFunctionReturn(0); 1323 } 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatTranspose_MPIAIJ" 1327 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1328 { 1329 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1330 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1331 int ierr,shift = Aloc->indexshift; 1332 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1333 Mat B; 1334 PetscScalar *array; 1335 1336 PetscFunctionBegin; 1337 if (!matout && M != N) { 1338 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1339 } 1340 1341 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1342 1343 /* copy over the A part */ 1344 Aloc = (Mat_SeqAIJ*)a->A->data; 1345 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1346 row = a->rstart; 1347 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1348 for (i=0; i<m; i++) { 1349 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1350 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1351 } 1352 aj = Aloc->j; 1353 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1354 1355 /* copy over the B part */ 1356 Aloc = (Mat_SeqAIJ*)a->B->data; 1357 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1358 row = a->rstart; 1359 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1360 ct = cols; 1361 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1362 for (i=0; i<m; i++) { 1363 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1364 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1365 } 1366 ierr = PetscFree(ct);CHKERRQ(ierr); 1367 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1368 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1369 if (matout) { 1370 *matout = B; 1371 } else { 1372 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1373 } 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #undef __FUNCT__ 1378 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1379 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1380 { 1381 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1382 Mat a = aij->A,b = aij->B; 1383 int ierr,s1,s2,s3; 1384 1385 PetscFunctionBegin; 1386 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1387 if (rr) { 1388 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1389 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1390 /* Overlap communication with computation. */ 1391 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1392 } 1393 if (ll) { 1394 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1395 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1396 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1397 } 1398 /* scale the diagonal block */ 1399 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1400 1401 if (rr) { 1402 /* Do a scatter end and then right scale the off-diagonal block */ 1403 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1404 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1405 } 1406 1407 PetscFunctionReturn(0); 1408 } 1409 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1413 int MatPrintHelp_MPIAIJ(Mat A) 1414 { 1415 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1416 int ierr; 1417 1418 PetscFunctionBegin; 1419 if (!a->rank) { 1420 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1421 } 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1427 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1428 { 1429 PetscFunctionBegin; 1430 *bs = 1; 1431 PetscFunctionReturn(0); 1432 } 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1435 int MatSetUnfactored_MPIAIJ(Mat A) 1436 { 1437 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1438 int ierr; 1439 1440 PetscFunctionBegin; 1441 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1442 PetscFunctionReturn(0); 1443 } 1444 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatEqual_MPIAIJ" 1447 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1448 { 1449 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1450 Mat a,b,c,d; 1451 PetscTruth flg; 1452 int ierr; 1453 1454 PetscFunctionBegin; 1455 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1456 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1457 a = matA->A; b = matA->B; 1458 c = matB->A; d = matB->B; 1459 1460 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1461 if (flg == PETSC_TRUE) { 1462 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1463 } 1464 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1465 PetscFunctionReturn(0); 1466 } 1467 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "MatCopy_MPIAIJ" 1470 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1471 { 1472 int ierr; 1473 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1474 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1475 PetscTruth flg; 1476 1477 PetscFunctionBegin; 1478 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1479 if (str != SAME_NONZERO_PATTERN || !flg) { 1480 /* because of the column compression in the off-processor part of the matrix a->B, 1481 the number of columns in a->B and b->B may be different, hence we cannot call 1482 the MatCopy() directly on the two parts. If need be, we can provide a more 1483 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1484 then copying the submatrices */ 1485 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1486 } else { 1487 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1488 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1489 } 1490 PetscFunctionReturn(0); 1491 } 1492 1493 #undef __FUNCT__ 1494 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1495 int MatSetUpPreallocation_MPIAIJ(Mat A) 1496 { 1497 int ierr; 1498 1499 PetscFunctionBegin; 1500 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1501 PetscFunctionReturn(0); 1502 } 1503 1504 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1505 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1506 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1507 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1508 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1509 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1510 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1511 #endif 1512 1513 #include "petscblaslapack.h" 1514 1515 #undef __FUNCT__ 1516 #define __FUNCT__ "MatAXPY_MPIAIJ" 1517 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1518 { 1519 int ierr,one; 1520 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1521 Mat_SeqAIJ *x,*y; 1522 1523 PetscFunctionBegin; 1524 if (str == SAME_NONZERO_PATTERN) { 1525 x = (Mat_SeqAIJ *)xx->A->data; 1526 y = (Mat_SeqAIJ *)yy->A->data; 1527 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1528 x = (Mat_SeqAIJ *)xx->B->data; 1529 y = (Mat_SeqAIJ *)yy->B->data; 1530 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1531 } else { 1532 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1533 } 1534 PetscFunctionReturn(0); 1535 } 1536 1537 /* -------------------------------------------------------------------*/ 1538 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1539 MatGetRow_MPIAIJ, 1540 MatRestoreRow_MPIAIJ, 1541 MatMult_MPIAIJ, 1542 MatMultAdd_MPIAIJ, 1543 MatMultTranspose_MPIAIJ, 1544 MatMultTransposeAdd_MPIAIJ, 1545 0, 1546 0, 1547 0, 1548 0, 1549 0, 1550 0, 1551 MatRelax_MPIAIJ, 1552 MatTranspose_MPIAIJ, 1553 MatGetInfo_MPIAIJ, 1554 MatEqual_MPIAIJ, 1555 MatGetDiagonal_MPIAIJ, 1556 MatDiagonalScale_MPIAIJ, 1557 MatNorm_MPIAIJ, 1558 MatAssemblyBegin_MPIAIJ, 1559 MatAssemblyEnd_MPIAIJ, 1560 0, 1561 MatSetOption_MPIAIJ, 1562 MatZeroEntries_MPIAIJ, 1563 MatZeroRows_MPIAIJ, 1564 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1565 MatLUFactorSymbolic_MPIAIJ_TFS, 1566 #else 1567 0, 1568 #endif 1569 0, 1570 0, 1571 0, 1572 MatSetUpPreallocation_MPIAIJ, 1573 0, 1574 0, 1575 0, 1576 0, 1577 MatDuplicate_MPIAIJ, 1578 0, 1579 0, 1580 0, 1581 0, 1582 MatAXPY_MPIAIJ, 1583 MatGetSubMatrices_MPIAIJ, 1584 MatIncreaseOverlap_MPIAIJ, 1585 MatGetValues_MPIAIJ, 1586 MatCopy_MPIAIJ, 1587 MatPrintHelp_MPIAIJ, 1588 MatScale_MPIAIJ, 1589 0, 1590 0, 1591 0, 1592 MatGetBlockSize_MPIAIJ, 1593 0, 1594 0, 1595 0, 1596 0, 1597 MatFDColoringCreate_MPIAIJ, 1598 0, 1599 MatSetUnfactored_MPIAIJ, 1600 0, 1601 0, 1602 MatGetSubMatrix_MPIAIJ, 1603 MatDestroy_MPIAIJ, 1604 MatView_MPIAIJ, 1605 MatGetPetscMaps_Petsc, 1606 0, 1607 0, 1608 0, 1609 0, 1610 0, 1611 0, 1612 0, 1613 0, 1614 MatSetColoring_MPIAIJ, 1615 MatSetValuesAdic_MPIAIJ, 1616 MatSetValuesAdifor_MPIAIJ 1617 }; 1618 1619 /* ----------------------------------------------------------------------------------------*/ 1620 1621 EXTERN_C_BEGIN 1622 #undef __FUNCT__ 1623 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1624 int MatStoreValues_MPIAIJ(Mat mat) 1625 { 1626 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1627 int ierr; 1628 1629 PetscFunctionBegin; 1630 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1631 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 EXTERN_C_END 1635 1636 EXTERN_C_BEGIN 1637 #undef __FUNCT__ 1638 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1639 int MatRetrieveValues_MPIAIJ(Mat mat) 1640 { 1641 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1642 int ierr; 1643 1644 PetscFunctionBegin; 1645 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1646 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1647 PetscFunctionReturn(0); 1648 } 1649 EXTERN_C_END 1650 1651 #include "petscpc.h" 1652 EXTERN_C_BEGIN 1653 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1654 EXTERN_C_END 1655 1656 EXTERN_C_BEGIN 1657 #undef __FUNCT__ 1658 #define __FUNCT__ "MatCreate_MPIAIJ" 1659 int MatCreate_MPIAIJ(Mat B) 1660 { 1661 Mat_MPIAIJ *b; 1662 int ierr,i,size; 1663 1664 PetscFunctionBegin; 1665 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1666 1667 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1668 B->data = (void*)b; 1669 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1670 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1671 B->factor = 0; 1672 B->assembled = PETSC_FALSE; 1673 B->mapping = 0; 1674 1675 B->insertmode = NOT_SET_VALUES; 1676 b->size = size; 1677 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1678 1679 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1680 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1681 1682 /* the information in the maps duplicates the information computed below, eventually 1683 we should remove the duplicate information that is not contained in the maps */ 1684 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1685 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1686 1687 /* build local table of row and column ownerships */ 1688 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1689 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1690 b->cowners = b->rowners + b->size + 2; 1691 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1692 b->rowners[0] = 0; 1693 for (i=2; i<=b->size; i++) { 1694 b->rowners[i] += b->rowners[i-1]; 1695 } 1696 b->rstart = b->rowners[b->rank]; 1697 b->rend = b->rowners[b->rank+1]; 1698 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1699 b->cowners[0] = 0; 1700 for (i=2; i<=b->size; i++) { 1701 b->cowners[i] += b->cowners[i-1]; 1702 } 1703 b->cstart = b->cowners[b->rank]; 1704 b->cend = b->cowners[b->rank+1]; 1705 1706 /* build cache for off array entries formed */ 1707 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1708 b->donotstash = PETSC_FALSE; 1709 b->colmap = 0; 1710 b->garray = 0; 1711 b->roworiented = PETSC_TRUE; 1712 1713 /* stuff used for matrix vector multiply */ 1714 b->lvec = PETSC_NULL; 1715 b->Mvctx = PETSC_NULL; 1716 1717 /* stuff for MatGetRow() */ 1718 b->rowindices = 0; 1719 b->rowvalues = 0; 1720 b->getrowactive = PETSC_FALSE; 1721 1722 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1723 "MatStoreValues_MPIAIJ", 1724 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1725 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1726 "MatRetrieveValues_MPIAIJ", 1727 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1728 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1729 "MatGetDiagonalBlock_MPIAIJ", 1730 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1731 1732 PetscFunctionReturn(0); 1733 } 1734 EXTERN_C_END 1735 1736 #undef __FUNCT__ 1737 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1738 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1739 { 1740 Mat mat; 1741 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1742 int ierr; 1743 1744 PetscFunctionBegin; 1745 *newmat = 0; 1746 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1747 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1748 a = (Mat_MPIAIJ*)mat->data; 1749 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1750 mat->factor = matin->factor; 1751 mat->assembled = PETSC_TRUE; 1752 mat->insertmode = NOT_SET_VALUES; 1753 mat->preallocated = PETSC_TRUE; 1754 1755 a->rstart = oldmat->rstart; 1756 a->rend = oldmat->rend; 1757 a->cstart = oldmat->cstart; 1758 a->cend = oldmat->cend; 1759 a->size = oldmat->size; 1760 a->rank = oldmat->rank; 1761 a->donotstash = oldmat->donotstash; 1762 a->roworiented = oldmat->roworiented; 1763 a->rowindices = 0; 1764 a->rowvalues = 0; 1765 a->getrowactive = PETSC_FALSE; 1766 1767 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1768 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1769 if (oldmat->colmap) { 1770 #if defined (PETSC_USE_CTABLE) 1771 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1772 #else 1773 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1774 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1775 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1776 #endif 1777 } else a->colmap = 0; 1778 if (oldmat->garray) { 1779 int len; 1780 len = oldmat->B->n; 1781 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1782 PetscLogObjectMemory(mat,len*sizeof(int)); 1783 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1784 } else a->garray = 0; 1785 1786 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1787 PetscLogObjectParent(mat,a->lvec); 1788 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1789 PetscLogObjectParent(mat,a->Mvctx); 1790 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1791 PetscLogObjectParent(mat,a->A); 1792 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1793 PetscLogObjectParent(mat,a->B); 1794 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1795 *newmat = mat; 1796 PetscFunctionReturn(0); 1797 } 1798 1799 #include "petscsys.h" 1800 1801 EXTERN_C_BEGIN 1802 #undef __FUNCT__ 1803 #define __FUNCT__ "MatLoad_MPIAIJ" 1804 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1805 { 1806 Mat A; 1807 PetscScalar *vals,*svals; 1808 MPI_Comm comm = ((PetscObject)viewer)->comm; 1809 MPI_Status status; 1810 int i,nz,ierr,j,rstart,rend,fd; 1811 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1812 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1813 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1814 1815 PetscFunctionBegin; 1816 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1817 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1818 if (!rank) { 1819 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1820 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1821 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1822 if (header[3] < 0) { 1823 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1824 } 1825 } 1826 1827 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1828 M = header[1]; N = header[2]; 1829 /* determine ownership of all rows */ 1830 m = M/size + ((M % size) > rank); 1831 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1832 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1833 rowners[0] = 0; 1834 for (i=2; i<=size; i++) { 1835 rowners[i] += rowners[i-1]; 1836 } 1837 rstart = rowners[rank]; 1838 rend = rowners[rank+1]; 1839 1840 /* distribute row lengths to all processors */ 1841 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1842 offlens = ourlens + (rend-rstart); 1843 if (!rank) { 1844 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1845 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1846 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1847 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1848 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1849 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1850 } else { 1851 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1852 } 1853 1854 if (!rank) { 1855 /* calculate the number of nonzeros on each processor */ 1856 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1857 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1858 for (i=0; i<size; i++) { 1859 for (j=rowners[i]; j< rowners[i+1]; j++) { 1860 procsnz[i] += rowlengths[j]; 1861 } 1862 } 1863 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1864 1865 /* determine max buffer needed and allocate it */ 1866 maxnz = 0; 1867 for (i=0; i<size; i++) { 1868 maxnz = PetscMax(maxnz,procsnz[i]); 1869 } 1870 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1871 1872 /* read in my part of the matrix column indices */ 1873 nz = procsnz[0]; 1874 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1875 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1876 1877 /* read in every one elses and ship off */ 1878 for (i=1; i<size; i++) { 1879 nz = procsnz[i]; 1880 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1881 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1882 } 1883 ierr = PetscFree(cols);CHKERRQ(ierr); 1884 } else { 1885 /* determine buffer space needed for message */ 1886 nz = 0; 1887 for (i=0; i<m; i++) { 1888 nz += ourlens[i]; 1889 } 1890 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1891 1892 /* receive message of column indices*/ 1893 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1894 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1895 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1896 } 1897 1898 /* determine column ownership if matrix is not square */ 1899 if (N != M) { 1900 n = N/size + ((N % size) > rank); 1901 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1902 cstart = cend - n; 1903 } else { 1904 cstart = rstart; 1905 cend = rend; 1906 n = cend - cstart; 1907 } 1908 1909 /* loop over local rows, determining number of off diagonal entries */ 1910 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1911 jj = 0; 1912 for (i=0; i<m; i++) { 1913 for (j=0; j<ourlens[i]; j++) { 1914 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1915 jj++; 1916 } 1917 } 1918 1919 /* create our matrix */ 1920 for (i=0; i<m; i++) { 1921 ourlens[i] -= offlens[i]; 1922 } 1923 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1924 A = *newmat; 1925 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1926 for (i=0; i<m; i++) { 1927 ourlens[i] += offlens[i]; 1928 } 1929 1930 if (!rank) { 1931 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1932 1933 /* read in my part of the matrix numerical values */ 1934 nz = procsnz[0]; 1935 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1936 1937 /* insert into matrix */ 1938 jj = rstart; 1939 smycols = mycols; 1940 svals = vals; 1941 for (i=0; i<m; i++) { 1942 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1943 smycols += ourlens[i]; 1944 svals += ourlens[i]; 1945 jj++; 1946 } 1947 1948 /* read in other processors and ship out */ 1949 for (i=1; i<size; i++) { 1950 nz = procsnz[i]; 1951 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1952 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1953 } 1954 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1955 } else { 1956 /* receive numeric values */ 1957 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1958 1959 /* receive message of values*/ 1960 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1961 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1962 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1963 1964 /* insert into matrix */ 1965 jj = rstart; 1966 smycols = mycols; 1967 svals = vals; 1968 for (i=0; i<m; i++) { 1969 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1970 smycols += ourlens[i]; 1971 svals += ourlens[i]; 1972 jj++; 1973 } 1974 } 1975 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1976 ierr = PetscFree(vals);CHKERRQ(ierr); 1977 ierr = PetscFree(mycols);CHKERRQ(ierr); 1978 ierr = PetscFree(rowners);CHKERRQ(ierr); 1979 1980 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1981 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1982 PetscFunctionReturn(0); 1983 } 1984 EXTERN_C_END 1985 1986 #undef __FUNCT__ 1987 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1988 /* 1989 Not great since it makes two copies of the submatrix, first an SeqAIJ 1990 in local and then by concatenating the local matrices the end result. 1991 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1992 */ 1993 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1994 { 1995 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1996 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 1997 Mat *local,M,Mreuse; 1998 PetscScalar *vwork,*aa; 1999 MPI_Comm comm = mat->comm; 2000 Mat_SeqAIJ *aij; 2001 2002 2003 PetscFunctionBegin; 2004 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2005 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2006 2007 if (call == MAT_REUSE_MATRIX) { 2008 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2009 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 2010 local = &Mreuse; 2011 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2012 } else { 2013 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2014 Mreuse = *local; 2015 ierr = PetscFree(local);CHKERRQ(ierr); 2016 } 2017 2018 /* 2019 m - number of local rows 2020 n - number of columns (same on all processors) 2021 rstart - first row in new global matrix generated 2022 */ 2023 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2024 if (call == MAT_INITIAL_MATRIX) { 2025 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2026 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 2027 ii = aij->i; 2028 jj = aij->j; 2029 2030 /* 2031 Determine the number of non-zeros in the diagonal and off-diagonal 2032 portions of the matrix in order to do correct preallocation 2033 */ 2034 2035 /* first get start and end of "diagonal" columns */ 2036 if (csize == PETSC_DECIDE) { 2037 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 2038 if (mglobal == n) { /* square matrix */ 2039 nlocal = m; 2040 } else { 2041 nlocal = n/size + ((n % size) > rank); 2042 } 2043 } else { 2044 nlocal = csize; 2045 } 2046 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2047 rstart = rend - nlocal; 2048 if (rank == size - 1 && rend != n) { 2049 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 2050 } 2051 2052 /* next, compute all the lengths */ 2053 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 2054 olens = dlens + m; 2055 for (i=0; i<m; i++) { 2056 jend = ii[i+1] - ii[i]; 2057 olen = 0; 2058 dlen = 0; 2059 for (j=0; j<jend; j++) { 2060 if (*jj < rstart || *jj >= rend) olen++; 2061 else dlen++; 2062 jj++; 2063 } 2064 olens[i] = olen; 2065 dlens[i] = dlen; 2066 } 2067 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2068 ierr = PetscFree(dlens);CHKERRQ(ierr); 2069 } else { 2070 int ml,nl; 2071 2072 M = *newmat; 2073 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2074 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 2075 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2076 /* 2077 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2078 rather than the slower MatSetValues(). 2079 */ 2080 M->was_assembled = PETSC_TRUE; 2081 M->assembled = PETSC_FALSE; 2082 } 2083 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2084 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2085 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 2086 ii = aij->i; 2087 jj = aij->j; 2088 aa = aij->a; 2089 for (i=0; i<m; i++) { 2090 row = rstart + i; 2091 nz = ii[i+1] - ii[i]; 2092 cwork = jj; jj += nz; 2093 vwork = aa; aa += nz; 2094 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2095 } 2096 2097 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2098 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2099 *newmat = M; 2100 2101 /* save submatrix used in processor for next request */ 2102 if (call == MAT_INITIAL_MATRIX) { 2103 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2104 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2105 } 2106 2107 PetscFunctionReturn(0); 2108 } 2109 2110 #undef __FUNCT__ 2111 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2112 /*@C 2113 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2114 (the default parallel PETSc format). For good matrix assembly performance 2115 the user should preallocate the matrix storage by setting the parameters 2116 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2117 performance can be increased by more than a factor of 50. 2118 2119 Collective on MPI_Comm 2120 2121 Input Parameters: 2122 + A - the matrix 2123 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2124 (same value is used for all local rows) 2125 . d_nnz - array containing the number of nonzeros in the various rows of the 2126 DIAGONAL portion of the local submatrix (possibly different for each row) 2127 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2128 The size of this array is equal to the number of local rows, i.e 'm'. 2129 You must leave room for the diagonal entry even if it is zero. 2130 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2131 submatrix (same value is used for all local rows). 2132 - o_nnz - array containing the number of nonzeros in the various rows of the 2133 OFF-DIAGONAL portion of the local submatrix (possibly different for 2134 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2135 structure. The size of this array is equal to the number 2136 of local rows, i.e 'm'. 2137 2138 The AIJ format (also called the Yale sparse matrix format or 2139 compressed row storage), is fully compatible with standard Fortran 77 2140 storage. That is, the stored row and column indices can begin at 2141 either one (as in Fortran) or zero. See the users manual for details. 2142 2143 The user MUST specify either the local or global matrix dimensions 2144 (possibly both). 2145 2146 The parallel matrix is partitioned such that the first m0 rows belong to 2147 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2148 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2149 2150 The DIAGONAL portion of the local submatrix of a processor can be defined 2151 as the submatrix which is obtained by extraction the part corresponding 2152 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2153 first row that belongs to the processor, and r2 is the last row belonging 2154 to the this processor. This is a square mxm matrix. The remaining portion 2155 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2156 2157 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2158 2159 By default, this format uses inodes (identical nodes) when possible. 2160 We search for consecutive rows with the same nonzero structure, thereby 2161 reusing matrix information to achieve increased efficiency. 2162 2163 Options Database Keys: 2164 + -mat_aij_no_inode - Do not use inodes 2165 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2166 - -mat_aij_oneindex - Internally use indexing starting at 1 2167 rather than 0. Note that when calling MatSetValues(), 2168 the user still MUST index entries starting at 0! 2169 2170 Example usage: 2171 2172 Consider the following 8x8 matrix with 34 non-zero values, that is 2173 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2174 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2175 as follows: 2176 2177 .vb 2178 1 2 0 | 0 3 0 | 0 4 2179 Proc0 0 5 6 | 7 0 0 | 8 0 2180 9 0 10 | 11 0 0 | 12 0 2181 ------------------------------------- 2182 13 0 14 | 15 16 17 | 0 0 2183 Proc1 0 18 0 | 19 20 21 | 0 0 2184 0 0 0 | 22 23 0 | 24 0 2185 ------------------------------------- 2186 Proc2 25 26 27 | 0 0 28 | 29 0 2187 30 0 0 | 31 32 33 | 0 34 2188 .ve 2189 2190 This can be represented as a collection of submatrices as: 2191 2192 .vb 2193 A B C 2194 D E F 2195 G H I 2196 .ve 2197 2198 Where the submatrices A,B,C are owned by proc0, D,E,F are 2199 owned by proc1, G,H,I are owned by proc2. 2200 2201 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2202 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2203 The 'M','N' parameters are 8,8, and have the same values on all procs. 2204 2205 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2206 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2207 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2208 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2209 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2210 matrix, ans [DF] as another SeqAIJ matrix. 2211 2212 When d_nz, o_nz parameters are specified, d_nz storage elements are 2213 allocated for every row of the local diagonal submatrix, and o_nz 2214 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2215 One way to choose d_nz and o_nz is to use the max nonzerors per local 2216 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2217 In this case, the values of d_nz,o_nz are: 2218 .vb 2219 proc0 : dnz = 2, o_nz = 2 2220 proc1 : dnz = 3, o_nz = 2 2221 proc2 : dnz = 1, o_nz = 4 2222 .ve 2223 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2224 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2225 for proc3. i.e we are using 12+15+10=37 storage locations to store 2226 34 values. 2227 2228 When d_nnz, o_nnz parameters are specified, the storage is specified 2229 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2230 In the above case the values for d_nnz,o_nnz are: 2231 .vb 2232 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2233 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2234 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2235 .ve 2236 Here the space allocated is sum of all the above values i.e 34, and 2237 hence pre-allocation is perfect. 2238 2239 Level: intermediate 2240 2241 .keywords: matrix, aij, compressed row, sparse, parallel 2242 2243 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2244 @*/ 2245 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2246 { 2247 Mat_MPIAIJ *b; 2248 int ierr,i; 2249 PetscTruth flg2; 2250 2251 PetscFunctionBegin; 2252 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2253 if (!flg2) PetscFunctionReturn(0); 2254 B->preallocated = PETSC_TRUE; 2255 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2256 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2257 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2258 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2259 if (d_nnz) { 2260 for (i=0; i<B->m; i++) { 2261 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2262 } 2263 } 2264 if (o_nnz) { 2265 for (i=0; i<B->m; i++) { 2266 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2267 } 2268 } 2269 b = (Mat_MPIAIJ*)B->data; 2270 2271 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2272 PetscLogObjectParent(B,b->A); 2273 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2274 PetscLogObjectParent(B,b->B); 2275 2276 PetscFunctionReturn(0); 2277 } 2278 2279 #undef __FUNCT__ 2280 #define __FUNCT__ "MatCreateMPIAIJ" 2281 /*@C 2282 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2283 (the default parallel PETSc format). For good matrix assembly performance 2284 the user should preallocate the matrix storage by setting the parameters 2285 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2286 performance can be increased by more than a factor of 50. 2287 2288 Collective on MPI_Comm 2289 2290 Input Parameters: 2291 + comm - MPI communicator 2292 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2293 This value should be the same as the local size used in creating the 2294 y vector for the matrix-vector product y = Ax. 2295 . n - This value should be the same as the local size used in creating the 2296 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2297 calculated if N is given) For square matrices n is almost always m. 2298 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2299 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2300 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2301 (same value is used for all local rows) 2302 . d_nnz - array containing the number of nonzeros in the various rows of the 2303 DIAGONAL portion of the local submatrix (possibly different for each row) 2304 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2305 The size of this array is equal to the number of local rows, i.e 'm'. 2306 You must leave room for the diagonal entry even if it is zero. 2307 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2308 submatrix (same value is used for all local rows). 2309 - o_nnz - array containing the number of nonzeros in the various rows of the 2310 OFF-DIAGONAL portion of the local submatrix (possibly different for 2311 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2312 structure. The size of this array is equal to the number 2313 of local rows, i.e 'm'. 2314 2315 Output Parameter: 2316 . A - the matrix 2317 2318 Notes: 2319 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2320 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2321 storage requirements for this matrix. 2322 2323 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2324 processor than it must be used on all processors that share the object for 2325 that argument. 2326 2327 The AIJ format (also called the Yale sparse matrix format or 2328 compressed row storage), is fully compatible with standard Fortran 77 2329 storage. That is, the stored row and column indices can begin at 2330 either one (as in Fortran) or zero. See the users manual for details. 2331 2332 The user MUST specify either the local or global matrix dimensions 2333 (possibly both). 2334 2335 The parallel matrix is partitioned such that the first m0 rows belong to 2336 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2337 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2338 2339 The DIAGONAL portion of the local submatrix of a processor can be defined 2340 as the submatrix which is obtained by extraction the part corresponding 2341 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2342 first row that belongs to the processor, and r2 is the last row belonging 2343 to the this processor. This is a square mxm matrix. The remaining portion 2344 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2345 2346 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2347 2348 By default, this format uses inodes (identical nodes) when possible. 2349 We search for consecutive rows with the same nonzero structure, thereby 2350 reusing matrix information to achieve increased efficiency. 2351 2352 Options Database Keys: 2353 + -mat_aij_no_inode - Do not use inodes 2354 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2355 - -mat_aij_oneindex - Internally use indexing starting at 1 2356 rather than 0. Note that when calling MatSetValues(), 2357 the user still MUST index entries starting at 0! 2358 2359 2360 Example usage: 2361 2362 Consider the following 8x8 matrix with 34 non-zero values, that is 2363 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2364 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2365 as follows: 2366 2367 .vb 2368 1 2 0 | 0 3 0 | 0 4 2369 Proc0 0 5 6 | 7 0 0 | 8 0 2370 9 0 10 | 11 0 0 | 12 0 2371 ------------------------------------- 2372 13 0 14 | 15 16 17 | 0 0 2373 Proc1 0 18 0 | 19 20 21 | 0 0 2374 0 0 0 | 22 23 0 | 24 0 2375 ------------------------------------- 2376 Proc2 25 26 27 | 0 0 28 | 29 0 2377 30 0 0 | 31 32 33 | 0 34 2378 .ve 2379 2380 This can be represented as a collection of submatrices as: 2381 2382 .vb 2383 A B C 2384 D E F 2385 G H I 2386 .ve 2387 2388 Where the submatrices A,B,C are owned by proc0, D,E,F are 2389 owned by proc1, G,H,I are owned by proc2. 2390 2391 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2392 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2393 The 'M','N' parameters are 8,8, and have the same values on all procs. 2394 2395 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2396 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2397 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2398 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2399 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2400 matrix, ans [DF] as another SeqAIJ matrix. 2401 2402 When d_nz, o_nz parameters are specified, d_nz storage elements are 2403 allocated for every row of the local diagonal submatrix, and o_nz 2404 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2405 One way to choose d_nz and o_nz is to use the max nonzerors per local 2406 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2407 In this case, the values of d_nz,o_nz are: 2408 .vb 2409 proc0 : dnz = 2, o_nz = 2 2410 proc1 : dnz = 3, o_nz = 2 2411 proc2 : dnz = 1, o_nz = 4 2412 .ve 2413 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2414 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2415 for proc3. i.e we are using 12+15+10=37 storage locations to store 2416 34 values. 2417 2418 When d_nnz, o_nnz parameters are specified, the storage is specified 2419 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2420 In the above case the values for d_nnz,o_nnz are: 2421 .vb 2422 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2423 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2424 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2425 .ve 2426 Here the space allocated is sum of all the above values i.e 34, and 2427 hence pre-allocation is perfect. 2428 2429 Level: intermediate 2430 2431 .keywords: matrix, aij, compressed row, sparse, parallel 2432 2433 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2434 @*/ 2435 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2436 { 2437 int ierr,size; 2438 2439 PetscFunctionBegin; 2440 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2441 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2442 if (size > 1) { 2443 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2444 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2445 } else { 2446 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2447 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2448 } 2449 PetscFunctionReturn(0); 2450 } 2451 2452 #undef __FUNCT__ 2453 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2454 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2455 { 2456 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2457 PetscFunctionBegin; 2458 *Ad = a->A; 2459 *Ao = a->B; 2460 *colmap = a->garray; 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2466 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2467 { 2468 int ierr,i; 2469 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2470 2471 PetscFunctionBegin; 2472 if (coloring->ctype == IS_COLORING_LOCAL) { 2473 ISColoringValue *allcolors,*colors; 2474 ISColoring ocoloring; 2475 2476 /* set coloring for diagonal portion */ 2477 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2478 2479 /* set coloring for off-diagonal portion */ 2480 ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2481 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2482 for (i=0; i<a->B->n; i++) { 2483 colors[i] = allcolors[a->garray[i]]; 2484 } 2485 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2486 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2487 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2488 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2489 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2490 ISColoringValue *colors; 2491 int *larray; 2492 ISColoring ocoloring; 2493 2494 /* set coloring for diagonal portion */ 2495 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2496 for (i=0; i<a->A->n; i++) { 2497 larray[i] = i + a->cstart; 2498 } 2499 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2500 ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2501 for (i=0; i<a->A->n; i++) { 2502 colors[i] = coloring->colors[larray[i]]; 2503 } 2504 ierr = PetscFree(larray);CHKERRQ(ierr); 2505 ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2506 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2507 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2508 2509 /* set coloring for off-diagonal portion */ 2510 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2511 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2512 ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 2513 for (i=0; i<a->B->n; i++) { 2514 colors[i] = coloring->colors[larray[i]]; 2515 } 2516 ierr = PetscFree(larray);CHKERRQ(ierr); 2517 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2518 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2519 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2520 } else { 2521 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2522 } 2523 2524 PetscFunctionReturn(0); 2525 } 2526 2527 #undef __FUNCT__ 2528 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2529 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2530 { 2531 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2532 int ierr; 2533 2534 PetscFunctionBegin; 2535 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2536 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 #undef __FUNCT__ 2541 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2542 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2543 { 2544 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2545 int ierr; 2546 2547 PetscFunctionBegin; 2548 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2549 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2550 PetscFunctionReturn(0); 2551 } 2552 2553 #undef __FUNCT__ 2554 #define __FUNCT__ "MatMerge" 2555 /*@C 2556 MatMerge - Creates a single large PETSc matrix by concatinating sequential 2557 matrices from each processor 2558 2559 Collective on MPI_Comm 2560 2561 Input Parameters: 2562 + comm - the communicators the parallel matrix will live on 2563 - inmat - the input sequential matrices 2564 2565 Output Parameter: 2566 . outmat - the parallel matrix generated 2567 2568 Level: advanced 2569 2570 Notes: The number of columns of the matrix in EACH of the seperate files 2571 MUST be the same. 2572 2573 @*/ 2574 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat) 2575 { 2576 int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz; 2577 PetscScalar *values; 2578 PetscMap columnmap,rowmap; 2579 2580 PetscFunctionBegin; 2581 2582 ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr); 2583 2584 /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */ 2585 ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr); 2586 ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr); 2587 ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr); 2588 ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr); 2589 ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr); 2590 2591 ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr); 2592 ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr); 2593 ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr); 2594 ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr); 2595 ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr); 2596 2597 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 2598 for (i=0;i<m;i++) { 2599 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2600 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 2601 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2602 } 2603 ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr); 2604 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 2605 2606 for (i=0;i<m;i++) { 2607 ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2608 I = i + rstart; 2609 ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2610 ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 2611 } 2612 ierr = MatDestroy(inmat);CHKERRQ(ierr); 2613 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2614 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2615 2616 PetscFunctionReturn(0); 2617 } 2618 2619 #undef __FUNCT__ 2620 #define __FUNCT__ "MatFileSplit" 2621 int MatFileSplit(Mat A,char *outfile) 2622 { 2623 int ierr,rank,len,m,N,i,rstart,*indx,nnz; 2624 PetscViewer out; 2625 char *name; 2626 Mat B; 2627 PetscScalar *values; 2628 2629 PetscFunctionBegin; 2630 2631 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 2632 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 2633 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr); 2634 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 2635 for (i=0;i<m;i++) { 2636 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2637 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 2638 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 2639 } 2640 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2641 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2642 2643 ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr); 2644 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 2645 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 2646 sprintf(name,"%s.%d",outfile,rank); 2647 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr); 2648 ierr = PetscFree(name); 2649 ierr = MatView(B,out);CHKERRQ(ierr); 2650 ierr = PetscViewerDestroy(out);CHKERRQ(ierr); 2651 ierr = MatDestroy(B);CHKERRQ(ierr); 2652 PetscFunctionReturn(0); 2653 } 2654