1 /*$Id: mpiaij.c,v 1.341 2001/08/06 21:15:22 bsmith Exp balay $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 int i,j,rstart,ncols,n,ierr,flg; 354 int *row,*col,other_disassembled; 355 PetscScalar *val; 356 InsertMode addv = mat->insertmode; 357 358 PetscFunctionBegin; 359 if (!aij->donotstash) { 360 while (1) { 361 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 362 if (!flg) break; 363 364 for (i=0; i<n;) { 365 /* Now identify the consecutive vals belonging to the same row */ 366 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 367 if (j < n) ncols = j-i; 368 else ncols = n-i; 369 /* Now assemble all these values with a single function call */ 370 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 371 i = j; 372 } 373 } 374 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 375 } 376 377 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 378 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 379 380 /* determine if any processor has disassembled, if so we must 381 also disassemble ourselfs, in order that we may reassemble. */ 382 /* 383 if nonzero structure of submatrix B cannot change then we know that 384 no processor disassembled thus we can skip this stuff 385 */ 386 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 387 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 388 if (mat->was_assembled && !other_disassembled) { 389 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 390 } 391 } 392 393 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 398 399 if (aij->rowvalues) { 400 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 401 aij->rowvalues = 0; 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 408 int MatZeroEntries_MPIAIJ(Mat A) 409 { 410 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 411 int ierr; 412 413 PetscFunctionBegin; 414 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 415 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatZeroRows_MPIAIJ" 421 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 422 { 423 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 424 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 425 int *procs,*nprocs,j,idx,nsends,*work,row; 426 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 427 int *rvalues,tag = A->tag,count,base,slen,n,*source; 428 int *lens,imdex,*lrows,*values,rstart=l->rstart; 429 MPI_Comm comm = A->comm; 430 MPI_Request *send_waits,*recv_waits; 431 MPI_Status recv_status,*send_status; 432 IS istmp; 433 PetscTruth found; 434 435 PetscFunctionBegin; 436 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 437 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 438 439 /* first count number of contributors to each processor */ 440 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 441 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 442 procs = nprocs + size; 443 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 444 for (i=0; i<N; i++) { 445 idx = rows[i]; 446 found = PETSC_FALSE; 447 for (j=0; j<size; j++) { 448 if (idx >= owners[j] && idx < owners[j+1]) { 449 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 450 } 451 } 452 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 453 } 454 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 455 456 /* inform other processors of number of messages and max length*/ 457 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 458 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 459 nrecvs = work[size+rank]; 460 nmax = work[rank]; 461 ierr = PetscFree(work);CHKERRQ(ierr); 462 463 /* post receives: */ 464 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 465 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 466 for (i=0; i<nrecvs; i++) { 467 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 468 } 469 470 /* do sends: 471 1) starts[i] gives the starting index in svalues for stuff going to 472 the ith processor 473 */ 474 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 475 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 476 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 477 starts[0] = 0; 478 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 479 for (i=0; i<N; i++) { 480 svalues[starts[owner[i]]++] = rows[i]; 481 } 482 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 483 484 starts[0] = 0; 485 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 486 count = 0; 487 for (i=0; i<size; i++) { 488 if (procs[i]) { 489 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 490 } 491 } 492 ierr = PetscFree(starts);CHKERRQ(ierr); 493 494 base = owners[rank]; 495 496 /* wait on receives */ 497 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 498 source = lens + nrecvs; 499 count = nrecvs; slen = 0; 500 while (count) { 501 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 502 /* unpack receives into our local space */ 503 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 504 source[imdex] = recv_status.MPI_SOURCE; 505 lens[imdex] = n; 506 slen += n; 507 count--; 508 } 509 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 510 511 /* move the data into the send scatter */ 512 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 513 count = 0; 514 for (i=0; i<nrecvs; i++) { 515 values = rvalues + i*nmax; 516 for (j=0; j<lens[i]; j++) { 517 lrows[count++] = values[j] - base; 518 } 519 } 520 ierr = PetscFree(rvalues);CHKERRQ(ierr); 521 ierr = PetscFree(lens);CHKERRQ(ierr); 522 ierr = PetscFree(owner);CHKERRQ(ierr); 523 ierr = PetscFree(nprocs);CHKERRQ(ierr); 524 525 /* actually zap the local rows */ 526 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 527 PetscLogObjectParent(A,istmp); 528 529 /* 530 Zero the required rows. If the "diagonal block" of the matrix 531 is square and the user wishes to set the diagonal we use seperate 532 code so that MatSetValues() is not called for each diagonal allocating 533 new memory, thus calling lots of mallocs and slowing things down. 534 535 Contributed by: Mathew Knepley 536 */ 537 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 538 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 539 if (diag && (l->A->M == l->A->N)) { 540 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 541 } else if (diag) { 542 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 543 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 544 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 546 } 547 for (i = 0; i < slen; i++) { 548 row = lrows[i] + rstart; 549 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 550 } 551 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 552 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 553 } else { 554 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 555 } 556 ierr = ISDestroy(istmp);CHKERRQ(ierr); 557 ierr = PetscFree(lrows);CHKERRQ(ierr); 558 559 /* wait on sends */ 560 if (nsends) { 561 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 562 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 563 ierr = PetscFree(send_status);CHKERRQ(ierr); 564 } 565 ierr = PetscFree(send_waits);CHKERRQ(ierr); 566 ierr = PetscFree(svalues);CHKERRQ(ierr); 567 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "MatMult_MPIAIJ" 573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 574 { 575 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 576 int ierr,nt; 577 578 PetscFunctionBegin; 579 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 580 if (nt != A->n) { 581 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 582 } 583 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 584 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 585 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 586 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatMultAdd_MPIAIJ" 592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 593 { 594 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 595 int ierr; 596 597 PetscFunctionBegin; 598 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 599 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 600 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 601 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 607 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 608 { 609 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 610 int ierr; 611 612 PetscFunctionBegin; 613 /* do nondiagonal part */ 614 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 615 /* send it on its way */ 616 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 617 /* do local part */ 618 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 619 /* receive remote parts: note this assumes the values are not actually */ 620 /* inserted in yy until the next line, which is true for my implementation*/ 621 /* but is not perhaps always true. */ 622 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 628 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 629 { 630 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 631 int ierr; 632 633 PetscFunctionBegin; 634 /* do nondiagonal part */ 635 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 636 /* send it on its way */ 637 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 638 /* do local part */ 639 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 640 /* receive remote parts: note this assumes the values are not actually */ 641 /* inserted in yy until the next line, which is true for my implementation*/ 642 /* but is not perhaps always true. */ 643 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 /* 648 This only works correctly for square matrices where the subblock A->A is the 649 diagonal block 650 */ 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 654 { 655 int ierr; 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 658 PetscFunctionBegin; 659 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 660 if (a->rstart != a->cstart || a->rend != a->cend) { 661 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 662 } 663 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatScale_MPIAIJ" 669 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 670 { 671 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 672 int ierr; 673 674 PetscFunctionBegin; 675 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 676 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatDestroy_MPIAIJ" 682 int MatDestroy_MPIAIJ(Mat mat) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 int ierr; 686 687 PetscFunctionBegin; 688 #if defined(PETSC_USE_LOG) 689 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 690 #endif 691 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 692 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 693 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 694 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 695 #if defined (PETSC_USE_CTABLE) 696 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 697 #else 698 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 699 #endif 700 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 701 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 702 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 703 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 704 ierr = PetscFree(aij);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 710 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 711 { 712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 713 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 714 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 715 PetscTruth isdraw,isascii,flg; 716 PetscViewer sviewer; 717 PetscViewerFormat format; 718 719 PetscFunctionBegin; 720 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 721 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 722 if (isascii) { 723 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 724 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 725 MatInfo info; 726 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 727 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 728 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 729 if (flg) { 730 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 731 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 732 } else { 733 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 734 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 735 } 736 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 737 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 738 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 739 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 740 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 741 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } else if (format == PETSC_VIEWER_ASCII_INFO) { 744 PetscFunctionReturn(0); 745 } 746 } else if (isdraw) { 747 PetscDraw draw; 748 PetscTruth isnull; 749 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 750 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 751 } 752 753 if (size == 1) { 754 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 755 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 756 } else { 757 /* assemble the entire matrix onto first processor. */ 758 Mat A; 759 Mat_SeqAIJ *Aloc; 760 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 761 PetscScalar *a; 762 763 if (!rank) { 764 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 765 } else { 766 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 767 } 768 PetscLogObjectParent(mat,A); 769 770 /* copy over the A part */ 771 Aloc = (Mat_SeqAIJ*)aij->A->data; 772 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 773 row = aij->rstart; 774 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 775 for (i=0; i<m; i++) { 776 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 777 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 778 } 779 aj = Aloc->j; 780 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 781 782 /* copy over the B part */ 783 Aloc = (Mat_SeqAIJ*)aij->B->data; 784 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 785 row = aij->rstart; 786 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 787 ct = cols; 788 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 789 for (i=0; i<m; i++) { 790 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 791 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 792 } 793 ierr = PetscFree(ct);CHKERRQ(ierr); 794 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 795 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 796 /* 797 Everyone has to call to draw the matrix since the graphics waits are 798 synchronized across all processors that share the PetscDraw object 799 */ 800 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 801 if (!rank) { 802 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 803 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 804 } 805 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 806 ierr = MatDestroy(A);CHKERRQ(ierr); 807 } 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "MatView_MPIAIJ" 813 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 814 { 815 int ierr; 816 PetscTruth isascii,isdraw,issocket,isbinary; 817 818 PetscFunctionBegin; 819 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 820 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 821 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 822 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 823 if (isascii || isdraw || isbinary || issocket) { 824 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 825 } else { 826 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 827 } 828 PetscFunctionReturn(0); 829 } 830 831 /* 832 This has to provide several versions. 833 834 2) a) use only local smoothing updating outer values only once. 835 b) local smoothing updating outer values each inner iteration 836 3) color updating out values betwen colors. 837 */ 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatRelax_MPIAIJ" 840 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx) 841 { 842 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 843 Mat AA = mat->A,BB = mat->B; 844 Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data; 845 PetscScalar *b,*x,*xs,*ls,d,*v,sum; 846 int ierr,*idx,*diag; 847 int n = matin->n,m = matin->m,i,shift = A->indexshift; 848 849 PetscFunctionBegin; 850 if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);} 851 diag = A->diag; 852 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 853 if (flag & SOR_ZERO_INITIAL_GUESS) { 854 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 855 PetscFunctionReturn(0); 856 } 857 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 858 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 859 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 860 if (xx != bb) { 861 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 862 } else { 863 b = x; 864 } 865 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 866 xs = x + shift; /* shift by one for index start of 1 */ 867 ls = ls + shift; 868 while (its--) { 869 /* go down through the rows */ 870 for (i=0; i<m; i++) { 871 n = A->i[i+1] - A->i[i]; 872 PetscLogFlops(4*n+3); 873 idx = A->j + A->i[i] + shift; 874 v = A->a + A->i[i] + shift; 875 sum = b[i]; 876 SPARSEDENSEMDOT(sum,xs,v,idx,n); 877 d = fshift + A->a[diag[i]+shift]; 878 n = B->i[i+1] - B->i[i]; 879 idx = B->j + B->i[i] + shift; 880 v = B->a + B->i[i] + shift; 881 SPARSEDENSEMDOT(sum,ls,v,idx,n); 882 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 883 } 884 /* come up through the rows */ 885 for (i=m-1; i>-1; i--) { 886 n = A->i[i+1] - A->i[i]; 887 PetscLogFlops(4*n+3); 888 idx = A->j + A->i[i] + shift; 889 v = A->a + A->i[i] + shift; 890 sum = b[i]; 891 SPARSEDENSEMDOT(sum,xs,v,idx,n); 892 d = fshift + A->a[diag[i]+shift]; 893 n = B->i[i+1] - B->i[i]; 894 idx = B->j + B->i[i] + shift; 895 v = B->a + B->i[i] + shift; 896 SPARSEDENSEMDOT(sum,ls,v,idx,n); 897 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 898 } 899 } 900 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 901 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 902 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 903 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 904 if (flag & SOR_ZERO_INITIAL_GUESS) { 905 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 906 PetscFunctionReturn(0); 907 } 908 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 909 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 910 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 911 if (xx != bb) { 912 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 913 } else { 914 b = x; 915 } 916 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 917 xs = x + shift; /* shift by one for index start of 1 */ 918 ls = ls + shift; 919 while (its--) { 920 for (i=0; i<m; i++) { 921 n = A->i[i+1] - A->i[i]; 922 PetscLogFlops(4*n+3); 923 idx = A->j + A->i[i] + shift; 924 v = A->a + A->i[i] + shift; 925 sum = b[i]; 926 SPARSEDENSEMDOT(sum,xs,v,idx,n); 927 d = fshift + A->a[diag[i]+shift]; 928 n = B->i[i+1] - B->i[i]; 929 idx = B->j + B->i[i] + shift; 930 v = B->a + B->i[i] + shift; 931 SPARSEDENSEMDOT(sum,ls,v,idx,n); 932 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 933 } 934 } 935 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 936 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 937 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 938 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 939 if (flag & SOR_ZERO_INITIAL_GUESS) { 940 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 941 PetscFunctionReturn(0); 942 } 943 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 944 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 945 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 946 if (xx != bb) { 947 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 948 } else { 949 b = x; 950 } 951 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 952 xs = x + shift; /* shift by one for index start of 1 */ 953 ls = ls + shift; 954 while (its--) { 955 for (i=m-1; i>-1; i--) { 956 n = A->i[i+1] - A->i[i]; 957 PetscLogFlops(4*n+3); 958 idx = A->j + A->i[i] + shift; 959 v = A->a + A->i[i] + shift; 960 sum = b[i]; 961 SPARSEDENSEMDOT(sum,xs,v,idx,n); 962 d = fshift + A->a[diag[i]+shift]; 963 n = B->i[i+1] - B->i[i]; 964 idx = B->j + B->i[i] + shift; 965 v = B->a + B->i[i] + shift; 966 SPARSEDENSEMDOT(sum,ls,v,idx,n); 967 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 968 } 969 } 970 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 971 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 972 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 973 } else { 974 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 975 } 976 PetscFunctionReturn(0); 977 } 978 979 #undef __FUNCT__ 980 #define __FUNCT__ "MatGetInfo_MPIAIJ" 981 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 982 { 983 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 984 Mat A = mat->A,B = mat->B; 985 int ierr; 986 PetscReal isend[5],irecv[5]; 987 988 PetscFunctionBegin; 989 info->block_size = 1.0; 990 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 991 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 992 isend[3] = info->memory; isend[4] = info->mallocs; 993 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 994 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 995 isend[3] += info->memory; isend[4] += info->mallocs; 996 if (flag == MAT_LOCAL) { 997 info->nz_used = isend[0]; 998 info->nz_allocated = isend[1]; 999 info->nz_unneeded = isend[2]; 1000 info->memory = isend[3]; 1001 info->mallocs = isend[4]; 1002 } else if (flag == MAT_GLOBAL_MAX) { 1003 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1004 info->nz_used = irecv[0]; 1005 info->nz_allocated = irecv[1]; 1006 info->nz_unneeded = irecv[2]; 1007 info->memory = irecv[3]; 1008 info->mallocs = irecv[4]; 1009 } else if (flag == MAT_GLOBAL_SUM) { 1010 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1011 info->nz_used = irecv[0]; 1012 info->nz_allocated = irecv[1]; 1013 info->nz_unneeded = irecv[2]; 1014 info->memory = irecv[3]; 1015 info->mallocs = irecv[4]; 1016 } 1017 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1018 info->fill_ratio_needed = 0; 1019 info->factor_mallocs = 0; 1020 info->rows_global = (double)matin->M; 1021 info->columns_global = (double)matin->N; 1022 info->rows_local = (double)matin->m; 1023 info->columns_local = (double)matin->N; 1024 1025 PetscFunctionReturn(0); 1026 } 1027 1028 #undef __FUNCT__ 1029 #define __FUNCT__ "MatSetOption_MPIAIJ" 1030 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1031 { 1032 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1033 int ierr; 1034 1035 PetscFunctionBegin; 1036 switch (op) { 1037 case MAT_NO_NEW_NONZERO_LOCATIONS: 1038 case MAT_YES_NEW_NONZERO_LOCATIONS: 1039 case MAT_COLUMNS_UNSORTED: 1040 case MAT_COLUMNS_SORTED: 1041 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1042 case MAT_KEEP_ZEROED_ROWS: 1043 case MAT_NEW_NONZERO_LOCATION_ERR: 1044 case MAT_USE_INODES: 1045 case MAT_DO_NOT_USE_INODES: 1046 case MAT_IGNORE_ZERO_ENTRIES: 1047 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1048 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1049 break; 1050 case MAT_ROW_ORIENTED: 1051 a->roworiented = PETSC_TRUE; 1052 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1053 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1054 break; 1055 case MAT_ROWS_SORTED: 1056 case MAT_ROWS_UNSORTED: 1057 case MAT_YES_NEW_DIAGONALS: 1058 case MAT_USE_SINGLE_PRECISION_SOLVES: 1059 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1060 break; 1061 case MAT_COLUMN_ORIENTED: 1062 a->roworiented = PETSC_FALSE; 1063 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1064 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1065 break; 1066 case MAT_IGNORE_OFF_PROC_ENTRIES: 1067 a->donotstash = PETSC_TRUE; 1068 break; 1069 case MAT_NO_NEW_DIAGONALS: 1070 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1071 break; 1072 default: 1073 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1074 break; 1075 } 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatGetOwnershipRange_MPIAIJ" 1081 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1082 { 1083 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1084 1085 PetscFunctionBegin; 1086 if (m) *m = mat->rstart; 1087 if (n) *n = mat->rend; 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatGetRow_MPIAIJ" 1093 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1094 { 1095 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1096 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1097 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1098 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1099 int *cmap,*idx_p; 1100 1101 PetscFunctionBegin; 1102 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1103 mat->getrowactive = PETSC_TRUE; 1104 1105 if (!mat->rowvalues && (idx || v)) { 1106 /* 1107 allocate enough space to hold information from the longest row. 1108 */ 1109 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1110 int max = 1,tmp; 1111 for (i=0; i<matin->m; i++) { 1112 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1113 if (max < tmp) { max = tmp; } 1114 } 1115 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1116 mat->rowindices = (int*)(mat->rowvalues + max); 1117 } 1118 1119 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1120 lrow = row - rstart; 1121 1122 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1123 if (!v) {pvA = 0; pvB = 0;} 1124 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1125 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1126 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1127 nztot = nzA + nzB; 1128 1129 cmap = mat->garray; 1130 if (v || idx) { 1131 if (nztot) { 1132 /* Sort by increasing column numbers, assuming A and B already sorted */ 1133 int imark = -1; 1134 if (v) { 1135 *v = v_p = mat->rowvalues; 1136 for (i=0; i<nzB; i++) { 1137 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1138 else break; 1139 } 1140 imark = i; 1141 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1142 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1143 } 1144 if (idx) { 1145 *idx = idx_p = mat->rowindices; 1146 if (imark > -1) { 1147 for (i=0; i<imark; i++) { 1148 idx_p[i] = cmap[cworkB[i]]; 1149 } 1150 } else { 1151 for (i=0; i<nzB; i++) { 1152 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1153 else break; 1154 } 1155 imark = i; 1156 } 1157 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1158 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1159 } 1160 } else { 1161 if (idx) *idx = 0; 1162 if (v) *v = 0; 1163 } 1164 } 1165 *nz = nztot; 1166 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1167 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1168 PetscFunctionReturn(0); 1169 } 1170 1171 #undef __FUNCT__ 1172 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1173 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1174 { 1175 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1176 1177 PetscFunctionBegin; 1178 if (aij->getrowactive == PETSC_FALSE) { 1179 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1180 } 1181 aij->getrowactive = PETSC_FALSE; 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatNorm_MPIAIJ" 1187 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1188 { 1189 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1190 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1191 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1192 PetscReal sum = 0.0; 1193 PetscScalar *v; 1194 1195 PetscFunctionBegin; 1196 if (aij->size == 1) { 1197 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1198 } else { 1199 if (type == NORM_FROBENIUS) { 1200 v = amat->a; 1201 for (i=0; i<amat->nz; i++) { 1202 #if defined(PETSC_USE_COMPLEX) 1203 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1204 #else 1205 sum += (*v)*(*v); v++; 1206 #endif 1207 } 1208 v = bmat->a; 1209 for (i=0; i<bmat->nz; i++) { 1210 #if defined(PETSC_USE_COMPLEX) 1211 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1212 #else 1213 sum += (*v)*(*v); v++; 1214 #endif 1215 } 1216 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1217 *norm = sqrt(*norm); 1218 } else if (type == NORM_1) { /* max column norm */ 1219 PetscReal *tmp,*tmp2; 1220 int *jj,*garray = aij->garray; 1221 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1222 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1223 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1224 *norm = 0.0; 1225 v = amat->a; jj = amat->j; 1226 for (j=0; j<amat->nz; j++) { 1227 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1228 } 1229 v = bmat->a; jj = bmat->j; 1230 for (j=0; j<bmat->nz; j++) { 1231 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1232 } 1233 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1234 for (j=0; j<mat->N; j++) { 1235 if (tmp2[j] > *norm) *norm = tmp2[j]; 1236 } 1237 ierr = PetscFree(tmp);CHKERRQ(ierr); 1238 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1239 } else if (type == NORM_INFINITY) { /* max row norm */ 1240 PetscReal ntemp = 0.0; 1241 for (j=0; j<aij->A->m; j++) { 1242 v = amat->a + amat->i[j] + shift; 1243 sum = 0.0; 1244 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1245 sum += PetscAbsScalar(*v); v++; 1246 } 1247 v = bmat->a + bmat->i[j] + shift; 1248 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1249 sum += PetscAbsScalar(*v); v++; 1250 } 1251 if (sum > ntemp) ntemp = sum; 1252 } 1253 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1254 } else { 1255 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1256 } 1257 } 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatTranspose_MPIAIJ" 1263 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1264 { 1265 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1266 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1267 int ierr,shift = Aloc->indexshift; 1268 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1269 Mat B; 1270 PetscScalar *array; 1271 1272 PetscFunctionBegin; 1273 if (!matout && M != N) { 1274 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1275 } 1276 1277 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1278 1279 /* copy over the A part */ 1280 Aloc = (Mat_SeqAIJ*)a->A->data; 1281 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1282 row = a->rstart; 1283 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1284 for (i=0; i<m; i++) { 1285 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1286 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1287 } 1288 aj = Aloc->j; 1289 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1290 1291 /* copy over the B part */ 1292 Aloc = (Mat_SeqAIJ*)a->B->data; 1293 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1294 row = a->rstart; 1295 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1296 ct = cols; 1297 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1298 for (i=0; i<m; i++) { 1299 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1300 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1301 } 1302 ierr = PetscFree(ct);CHKERRQ(ierr); 1303 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1304 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1305 if (matout) { 1306 *matout = B; 1307 } else { 1308 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1309 } 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1315 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1316 { 1317 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1318 Mat a = aij->A,b = aij->B; 1319 int ierr,s1,s2,s3; 1320 1321 PetscFunctionBegin; 1322 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1323 if (rr) { 1324 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1325 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1326 /* Overlap communication with computation. */ 1327 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1328 } 1329 if (ll) { 1330 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1331 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1332 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1333 } 1334 /* scale the diagonal block */ 1335 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1336 1337 if (rr) { 1338 /* Do a scatter end and then right scale the off-diagonal block */ 1339 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1340 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1341 } 1342 1343 PetscFunctionReturn(0); 1344 } 1345 1346 1347 #undef __FUNCT__ 1348 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1349 int MatPrintHelp_MPIAIJ(Mat A) 1350 { 1351 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1352 int ierr; 1353 1354 PetscFunctionBegin; 1355 if (!a->rank) { 1356 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1357 } 1358 PetscFunctionReturn(0); 1359 } 1360 1361 #undef __FUNCT__ 1362 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1363 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1364 { 1365 PetscFunctionBegin; 1366 *bs = 1; 1367 PetscFunctionReturn(0); 1368 } 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1371 int MatSetUnfactored_MPIAIJ(Mat A) 1372 { 1373 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1374 int ierr; 1375 1376 PetscFunctionBegin; 1377 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #undef __FUNCT__ 1382 #define __FUNCT__ "MatEqual_MPIAIJ" 1383 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1384 { 1385 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1386 Mat a,b,c,d; 1387 PetscTruth flg; 1388 int ierr; 1389 1390 PetscFunctionBegin; 1391 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1392 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1393 a = matA->A; b = matA->B; 1394 c = matB->A; d = matB->B; 1395 1396 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1397 if (flg == PETSC_TRUE) { 1398 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1399 } 1400 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1401 PetscFunctionReturn(0); 1402 } 1403 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatCopy_MPIAIJ" 1406 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1407 { 1408 int ierr; 1409 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1410 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1411 PetscTruth flg; 1412 1413 PetscFunctionBegin; 1414 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1415 if (str != SAME_NONZERO_PATTERN || !flg) { 1416 /* because of the column compression in the off-processor part of the matrix a->B, 1417 the number of columns in a->B and b->B may be different, hence we cannot call 1418 the MatCopy() directly on the two parts. If need be, we can provide a more 1419 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1420 then copying the submatrices */ 1421 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1422 } else { 1423 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1424 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1425 } 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1431 int MatSetUpPreallocation_MPIAIJ(Mat A) 1432 { 1433 int ierr; 1434 1435 PetscFunctionBegin; 1436 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1441 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1442 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1443 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1444 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1445 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1446 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1447 #endif 1448 1449 /* -------------------------------------------------------------------*/ 1450 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1451 MatGetRow_MPIAIJ, 1452 MatRestoreRow_MPIAIJ, 1453 MatMult_MPIAIJ, 1454 MatMultAdd_MPIAIJ, 1455 MatMultTranspose_MPIAIJ, 1456 MatMultTransposeAdd_MPIAIJ, 1457 0, 1458 0, 1459 0, 1460 0, 1461 0, 1462 0, 1463 MatRelax_MPIAIJ, 1464 MatTranspose_MPIAIJ, 1465 MatGetInfo_MPIAIJ, 1466 MatEqual_MPIAIJ, 1467 MatGetDiagonal_MPIAIJ, 1468 MatDiagonalScale_MPIAIJ, 1469 MatNorm_MPIAIJ, 1470 MatAssemblyBegin_MPIAIJ, 1471 MatAssemblyEnd_MPIAIJ, 1472 0, 1473 MatSetOption_MPIAIJ, 1474 MatZeroEntries_MPIAIJ, 1475 MatZeroRows_MPIAIJ, 1476 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1477 MatLUFactorSymbolic_MPIAIJ_TFS, 1478 #else 1479 0, 1480 #endif 1481 0, 1482 0, 1483 0, 1484 MatSetUpPreallocation_MPIAIJ, 1485 0, 1486 MatGetOwnershipRange_MPIAIJ, 1487 0, 1488 0, 1489 0, 1490 0, 1491 MatDuplicate_MPIAIJ, 1492 0, 1493 0, 1494 0, 1495 0, 1496 0, 1497 MatGetSubMatrices_MPIAIJ, 1498 MatIncreaseOverlap_MPIAIJ, 1499 MatGetValues_MPIAIJ, 1500 MatCopy_MPIAIJ, 1501 MatPrintHelp_MPIAIJ, 1502 MatScale_MPIAIJ, 1503 0, 1504 0, 1505 0, 1506 MatGetBlockSize_MPIAIJ, 1507 0, 1508 0, 1509 0, 1510 0, 1511 MatFDColoringCreate_MPIAIJ, 1512 0, 1513 MatSetUnfactored_MPIAIJ, 1514 0, 1515 0, 1516 MatGetSubMatrix_MPIAIJ, 1517 MatDestroy_MPIAIJ, 1518 MatView_MPIAIJ, 1519 MatGetPetscMaps_Petsc, 1520 0, 1521 0, 1522 0, 1523 0, 1524 0, 1525 0, 1526 0, 1527 0, 1528 MatSetColoring_MPIAIJ, 1529 MatSetValuesAdic_MPIAIJ, 1530 MatSetValuesAdifor_MPIAIJ 1531 }; 1532 1533 /* ----------------------------------------------------------------------------------------*/ 1534 1535 EXTERN_C_BEGIN 1536 #undef __FUNCT__ 1537 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1538 int MatStoreValues_MPIAIJ(Mat mat) 1539 { 1540 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1541 int ierr; 1542 1543 PetscFunctionBegin; 1544 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1545 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 EXTERN_C_END 1549 1550 EXTERN_C_BEGIN 1551 #undef __FUNCT__ 1552 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1553 int MatRetrieveValues_MPIAIJ(Mat mat) 1554 { 1555 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1556 int ierr; 1557 1558 PetscFunctionBegin; 1559 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1560 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1561 PetscFunctionReturn(0); 1562 } 1563 EXTERN_C_END 1564 1565 #include "petscpc.h" 1566 EXTERN_C_BEGIN 1567 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1568 EXTERN_C_END 1569 1570 EXTERN_C_BEGIN 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatCreate_MPIAIJ" 1573 int MatCreate_MPIAIJ(Mat B) 1574 { 1575 Mat_MPIAIJ *b; 1576 int ierr,i,size; 1577 1578 PetscFunctionBegin; 1579 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1580 1581 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1582 B->data = (void*)b; 1583 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1584 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1585 B->factor = 0; 1586 B->assembled = PETSC_FALSE; 1587 B->mapping = 0; 1588 1589 B->insertmode = NOT_SET_VALUES; 1590 b->size = size; 1591 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1592 1593 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1594 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1595 1596 /* the information in the maps duplicates the information computed below, eventually 1597 we should remove the duplicate information that is not contained in the maps */ 1598 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1599 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1600 1601 /* build local table of row and column ownerships */ 1602 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1603 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1604 b->cowners = b->rowners + b->size + 2; 1605 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1606 b->rowners[0] = 0; 1607 for (i=2; i<=b->size; i++) { 1608 b->rowners[i] += b->rowners[i-1]; 1609 } 1610 b->rstart = b->rowners[b->rank]; 1611 b->rend = b->rowners[b->rank+1]; 1612 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1613 b->cowners[0] = 0; 1614 for (i=2; i<=b->size; i++) { 1615 b->cowners[i] += b->cowners[i-1]; 1616 } 1617 b->cstart = b->cowners[b->rank]; 1618 b->cend = b->cowners[b->rank+1]; 1619 1620 /* build cache for off array entries formed */ 1621 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1622 b->donotstash = PETSC_FALSE; 1623 b->colmap = 0; 1624 b->garray = 0; 1625 b->roworiented = PETSC_TRUE; 1626 1627 /* stuff used for matrix vector multiply */ 1628 b->lvec = PETSC_NULL; 1629 b->Mvctx = PETSC_NULL; 1630 1631 /* stuff for MatGetRow() */ 1632 b->rowindices = 0; 1633 b->rowvalues = 0; 1634 b->getrowactive = PETSC_FALSE; 1635 1636 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1637 "MatStoreValues_MPIAIJ", 1638 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1639 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1640 "MatRetrieveValues_MPIAIJ", 1641 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1642 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1643 "MatGetDiagonalBlock_MPIAIJ", 1644 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1645 PetscFunctionReturn(0); 1646 } 1647 EXTERN_C_END 1648 1649 #undef __FUNCT__ 1650 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1651 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1652 { 1653 Mat mat; 1654 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1655 int ierr; 1656 1657 PetscFunctionBegin; 1658 *newmat = 0; 1659 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1660 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1661 a = (Mat_MPIAIJ*)mat->data; 1662 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1663 mat->factor = matin->factor; 1664 mat->assembled = PETSC_TRUE; 1665 mat->insertmode = NOT_SET_VALUES; 1666 mat->preallocated = PETSC_TRUE; 1667 1668 a->rstart = oldmat->rstart; 1669 a->rend = oldmat->rend; 1670 a->cstart = oldmat->cstart; 1671 a->cend = oldmat->cend; 1672 a->size = oldmat->size; 1673 a->rank = oldmat->rank; 1674 a->donotstash = oldmat->donotstash; 1675 a->roworiented = oldmat->roworiented; 1676 a->rowindices = 0; 1677 a->rowvalues = 0; 1678 a->getrowactive = PETSC_FALSE; 1679 1680 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1681 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1682 if (oldmat->colmap) { 1683 #if defined (PETSC_USE_CTABLE) 1684 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1685 #else 1686 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1687 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1688 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1689 #endif 1690 } else a->colmap = 0; 1691 if (oldmat->garray) { 1692 int len; 1693 len = oldmat->B->n; 1694 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1695 PetscLogObjectMemory(mat,len*sizeof(int)); 1696 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1697 } else a->garray = 0; 1698 1699 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1700 PetscLogObjectParent(mat,a->lvec); 1701 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1702 PetscLogObjectParent(mat,a->Mvctx); 1703 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1704 PetscLogObjectParent(mat,a->A); 1705 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1706 PetscLogObjectParent(mat,a->B); 1707 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1708 *newmat = mat; 1709 PetscFunctionReturn(0); 1710 } 1711 1712 #include "petscsys.h" 1713 1714 EXTERN_C_BEGIN 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatLoad_MPIAIJ" 1717 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1718 { 1719 Mat A; 1720 PetscScalar *vals,*svals; 1721 MPI_Comm comm = ((PetscObject)viewer)->comm; 1722 MPI_Status status; 1723 int i,nz,ierr,j,rstart,rend,fd; 1724 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1725 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1726 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1727 1728 PetscFunctionBegin; 1729 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1730 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1731 if (!rank) { 1732 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1733 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1734 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1735 if (header[3] < 0) { 1736 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1737 } 1738 } 1739 1740 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1741 M = header[1]; N = header[2]; 1742 /* determine ownership of all rows */ 1743 m = M/size + ((M % size) > rank); 1744 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1745 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1746 rowners[0] = 0; 1747 for (i=2; i<=size; i++) { 1748 rowners[i] += rowners[i-1]; 1749 } 1750 rstart = rowners[rank]; 1751 rend = rowners[rank+1]; 1752 1753 /* distribute row lengths to all processors */ 1754 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1755 offlens = ourlens + (rend-rstart); 1756 if (!rank) { 1757 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1758 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1759 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1760 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1761 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1762 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1763 } else { 1764 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1765 } 1766 1767 if (!rank) { 1768 /* calculate the number of nonzeros on each processor */ 1769 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1770 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1771 for (i=0; i<size; i++) { 1772 for (j=rowners[i]; j< rowners[i+1]; j++) { 1773 procsnz[i] += rowlengths[j]; 1774 } 1775 } 1776 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1777 1778 /* determine max buffer needed and allocate it */ 1779 maxnz = 0; 1780 for (i=0; i<size; i++) { 1781 maxnz = PetscMax(maxnz,procsnz[i]); 1782 } 1783 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1784 1785 /* read in my part of the matrix column indices */ 1786 nz = procsnz[0]; 1787 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1788 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1789 1790 /* read in every one elses and ship off */ 1791 for (i=1; i<size; i++) { 1792 nz = procsnz[i]; 1793 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1794 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1795 } 1796 ierr = PetscFree(cols);CHKERRQ(ierr); 1797 } else { 1798 /* determine buffer space needed for message */ 1799 nz = 0; 1800 for (i=0; i<m; i++) { 1801 nz += ourlens[i]; 1802 } 1803 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1804 1805 /* receive message of column indices*/ 1806 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1807 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1808 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1809 } 1810 1811 /* determine column ownership if matrix is not square */ 1812 if (N != M) { 1813 n = N/size + ((N % size) > rank); 1814 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1815 cstart = cend - n; 1816 } else { 1817 cstart = rstart; 1818 cend = rend; 1819 n = cend - cstart; 1820 } 1821 1822 /* loop over local rows, determining number of off diagonal entries */ 1823 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1824 jj = 0; 1825 for (i=0; i<m; i++) { 1826 for (j=0; j<ourlens[i]; j++) { 1827 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1828 jj++; 1829 } 1830 } 1831 1832 /* create our matrix */ 1833 for (i=0; i<m; i++) { 1834 ourlens[i] -= offlens[i]; 1835 } 1836 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1837 A = *newmat; 1838 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1839 for (i=0; i<m; i++) { 1840 ourlens[i] += offlens[i]; 1841 } 1842 1843 if (!rank) { 1844 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1845 1846 /* read in my part of the matrix numerical values */ 1847 nz = procsnz[0]; 1848 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1849 1850 /* insert into matrix */ 1851 jj = rstart; 1852 smycols = mycols; 1853 svals = vals; 1854 for (i=0; i<m; i++) { 1855 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1856 smycols += ourlens[i]; 1857 svals += ourlens[i]; 1858 jj++; 1859 } 1860 1861 /* read in other processors and ship out */ 1862 for (i=1; i<size; i++) { 1863 nz = procsnz[i]; 1864 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1865 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1866 } 1867 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1868 } else { 1869 /* receive numeric values */ 1870 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1871 1872 /* receive message of values*/ 1873 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1874 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1875 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1876 1877 /* insert into matrix */ 1878 jj = rstart; 1879 smycols = mycols; 1880 svals = vals; 1881 for (i=0; i<m; i++) { 1882 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1883 smycols += ourlens[i]; 1884 svals += ourlens[i]; 1885 jj++; 1886 } 1887 } 1888 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1889 ierr = PetscFree(vals);CHKERRQ(ierr); 1890 ierr = PetscFree(mycols);CHKERRQ(ierr); 1891 ierr = PetscFree(rowners);CHKERRQ(ierr); 1892 1893 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1894 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1895 PetscFunctionReturn(0); 1896 } 1897 EXTERN_C_END 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1901 /* 1902 Not great since it makes two copies of the submatrix, first an SeqAIJ 1903 in local and then by concatenating the local matrices the end result. 1904 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1905 */ 1906 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1907 { 1908 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1909 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1910 Mat *local,M,Mreuse; 1911 PetscScalar *vwork,*aa; 1912 MPI_Comm comm = mat->comm; 1913 Mat_SeqAIJ *aij; 1914 1915 1916 PetscFunctionBegin; 1917 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1918 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1919 1920 if (call == MAT_REUSE_MATRIX) { 1921 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1922 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1923 local = &Mreuse; 1924 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1925 } else { 1926 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1927 Mreuse = *local; 1928 ierr = PetscFree(local);CHKERRQ(ierr); 1929 } 1930 1931 /* 1932 m - number of local rows 1933 n - number of columns (same on all processors) 1934 rstart - first row in new global matrix generated 1935 */ 1936 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1937 if (call == MAT_INITIAL_MATRIX) { 1938 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1939 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1940 ii = aij->i; 1941 jj = aij->j; 1942 1943 /* 1944 Determine the number of non-zeros in the diagonal and off-diagonal 1945 portions of the matrix in order to do correct preallocation 1946 */ 1947 1948 /* first get start and end of "diagonal" columns */ 1949 if (csize == PETSC_DECIDE) { 1950 nlocal = n/size + ((n % size) > rank); 1951 } else { 1952 nlocal = csize; 1953 } 1954 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1955 rstart = rend - nlocal; 1956 if (rank == size - 1 && rend != n) { 1957 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1958 } 1959 1960 /* next, compute all the lengths */ 1961 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1962 olens = dlens + m; 1963 for (i=0; i<m; i++) { 1964 jend = ii[i+1] - ii[i]; 1965 olen = 0; 1966 dlen = 0; 1967 for (j=0; j<jend; j++) { 1968 if (*jj < rstart || *jj >= rend) olen++; 1969 else dlen++; 1970 jj++; 1971 } 1972 olens[i] = olen; 1973 dlens[i] = dlen; 1974 } 1975 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1976 ierr = PetscFree(dlens);CHKERRQ(ierr); 1977 } else { 1978 int ml,nl; 1979 1980 M = *newmat; 1981 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1982 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1983 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1984 /* 1985 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1986 rather than the slower MatSetValues(). 1987 */ 1988 M->was_assembled = PETSC_TRUE; 1989 M->assembled = PETSC_FALSE; 1990 } 1991 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1992 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1993 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1994 ii = aij->i; 1995 jj = aij->j; 1996 aa = aij->a; 1997 for (i=0; i<m; i++) { 1998 row = rstart + i; 1999 nz = ii[i+1] - ii[i]; 2000 cwork = jj; jj += nz; 2001 vwork = aa; aa += nz; 2002 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2003 } 2004 2005 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2006 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2007 *newmat = M; 2008 2009 /* save submatrix used in processor for next request */ 2010 if (call == MAT_INITIAL_MATRIX) { 2011 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2012 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2013 } 2014 2015 PetscFunctionReturn(0); 2016 } 2017 2018 #undef __FUNCT__ 2019 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2020 /*@C 2021 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2022 (the default parallel PETSc format). For good matrix assembly performance 2023 the user should preallocate the matrix storage by setting the parameters 2024 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2025 performance can be increased by more than a factor of 50. 2026 2027 Collective on MPI_Comm 2028 2029 Input Parameters: 2030 + A - the matrix 2031 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2032 (same value is used for all local rows) 2033 . d_nnz - array containing the number of nonzeros in the various rows of the 2034 DIAGONAL portion of the local submatrix (possibly different for each row) 2035 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2036 The size of this array is equal to the number of local rows, i.e 'm'. 2037 You must leave room for the diagonal entry even if it is zero. 2038 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2039 submatrix (same value is used for all local rows). 2040 - o_nnz - array containing the number of nonzeros in the various rows of the 2041 OFF-DIAGONAL portion of the local submatrix (possibly different for 2042 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2043 structure. The size of this array is equal to the number 2044 of local rows, i.e 'm'. 2045 2046 The AIJ format (also called the Yale sparse matrix format or 2047 compressed row storage), is fully compatible with standard Fortran 77 2048 storage. That is, the stored row and column indices can begin at 2049 either one (as in Fortran) or zero. See the users manual for details. 2050 2051 The user MUST specify either the local or global matrix dimensions 2052 (possibly both). 2053 2054 The parallel matrix is partitioned such that the first m0 rows belong to 2055 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2056 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2057 2058 The DIAGONAL portion of the local submatrix of a processor can be defined 2059 as the submatrix which is obtained by extraction the part corresponding 2060 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2061 first row that belongs to the processor, and r2 is the last row belonging 2062 to the this processor. This is a square mxm matrix. The remaining portion 2063 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2064 2065 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2066 2067 By default, this format uses inodes (identical nodes) when possible. 2068 We search for consecutive rows with the same nonzero structure, thereby 2069 reusing matrix information to achieve increased efficiency. 2070 2071 Options Database Keys: 2072 + -mat_aij_no_inode - Do not use inodes 2073 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2074 - -mat_aij_oneindex - Internally use indexing starting at 1 2075 rather than 0. Note that when calling MatSetValues(), 2076 the user still MUST index entries starting at 0! 2077 2078 Example usage: 2079 2080 Consider the following 8x8 matrix with 34 non-zero values, that is 2081 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2082 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2083 as follows: 2084 2085 .vb 2086 1 2 0 | 0 3 0 | 0 4 2087 Proc0 0 5 6 | 7 0 0 | 8 0 2088 9 0 10 | 11 0 0 | 12 0 2089 ------------------------------------- 2090 13 0 14 | 15 16 17 | 0 0 2091 Proc1 0 18 0 | 19 20 21 | 0 0 2092 0 0 0 | 22 23 0 | 24 0 2093 ------------------------------------- 2094 Proc2 25 26 27 | 0 0 28 | 29 0 2095 30 0 0 | 31 32 33 | 0 34 2096 .ve 2097 2098 This can be represented as a collection of submatrices as: 2099 2100 .vb 2101 A B C 2102 D E F 2103 G H I 2104 .ve 2105 2106 Where the submatrices A,B,C are owned by proc0, D,E,F are 2107 owned by proc1, G,H,I are owned by proc2. 2108 2109 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2110 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2111 The 'M','N' parameters are 8,8, and have the same values on all procs. 2112 2113 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2114 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2115 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2116 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2117 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2118 matrix, ans [DF] as another SeqAIJ matrix. 2119 2120 When d_nz, o_nz parameters are specified, d_nz storage elements are 2121 allocated for every row of the local diagonal submatrix, and o_nz 2122 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2123 One way to choose d_nz and o_nz is to use the max nonzerors per local 2124 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2125 In this case, the values of d_nz,o_nz are: 2126 .vb 2127 proc0 : dnz = 2, o_nz = 2 2128 proc1 : dnz = 3, o_nz = 2 2129 proc2 : dnz = 1, o_nz = 4 2130 .ve 2131 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2132 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2133 for proc3. i.e we are using 12+15+10=37 storage locations to store 2134 34 values. 2135 2136 When d_nnz, o_nnz parameters are specified, the storage is specified 2137 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2138 In the above case the values for d_nnz,o_nnz are: 2139 .vb 2140 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2141 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2142 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2143 .ve 2144 Here the space allocated is sum of all the above values i.e 34, and 2145 hence pre-allocation is perfect. 2146 2147 Level: intermediate 2148 2149 .keywords: matrix, aij, compressed row, sparse, parallel 2150 2151 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2152 @*/ 2153 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2154 { 2155 Mat_MPIAIJ *b; 2156 int ierr,i; 2157 PetscTruth flg2; 2158 2159 PetscFunctionBegin; 2160 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2161 if (!flg2) PetscFunctionReturn(0); 2162 B->preallocated = PETSC_TRUE; 2163 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2164 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2165 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2166 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2167 if (d_nnz) { 2168 for (i=0; i<B->m; i++) { 2169 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2170 } 2171 } 2172 if (o_nnz) { 2173 for (i=0; i<B->m; i++) { 2174 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2175 } 2176 } 2177 b = (Mat_MPIAIJ*)B->data; 2178 2179 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2180 PetscLogObjectParent(B,b->A); 2181 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2182 PetscLogObjectParent(B,b->B); 2183 2184 PetscFunctionReturn(0); 2185 } 2186 2187 #undef __FUNCT__ 2188 #define __FUNCT__ "MatCreateMPIAIJ" 2189 /*@C 2190 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2191 (the default parallel PETSc format). For good matrix assembly performance 2192 the user should preallocate the matrix storage by setting the parameters 2193 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2194 performance can be increased by more than a factor of 50. 2195 2196 Collective on MPI_Comm 2197 2198 Input Parameters: 2199 + comm - MPI communicator 2200 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2201 This value should be the same as the local size used in creating the 2202 y vector for the matrix-vector product y = Ax. 2203 . n - This value should be the same as the local size used in creating the 2204 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2205 calculated if N is given) For square matrices n is almost always m. 2206 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2207 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2208 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2209 (same value is used for all local rows) 2210 . d_nnz - array containing the number of nonzeros in the various rows of the 2211 DIAGONAL portion of the local submatrix (possibly different for each row) 2212 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2213 The size of this array is equal to the number of local rows, i.e 'm'. 2214 You must leave room for the diagonal entry even if it is zero. 2215 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2216 submatrix (same value is used for all local rows). 2217 - o_nnz - array containing the number of nonzeros in the various rows of the 2218 OFF-DIAGONAL portion of the local submatrix (possibly different for 2219 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2220 structure. The size of this array is equal to the number 2221 of local rows, i.e 'm'. 2222 2223 Output Parameter: 2224 . A - the matrix 2225 2226 Notes: 2227 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2228 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2229 storage requirements for this matrix. 2230 2231 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2232 processor than it must be used on all processors that share the object for 2233 that argument. 2234 2235 The AIJ format (also called the Yale sparse matrix format or 2236 compressed row storage), is fully compatible with standard Fortran 77 2237 storage. That is, the stored row and column indices can begin at 2238 either one (as in Fortran) or zero. See the users manual for details. 2239 2240 The user MUST specify either the local or global matrix dimensions 2241 (possibly both). 2242 2243 The parallel matrix is partitioned such that the first m0 rows belong to 2244 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2245 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2246 2247 The DIAGONAL portion of the local submatrix of a processor can be defined 2248 as the submatrix which is obtained by extraction the part corresponding 2249 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2250 first row that belongs to the processor, and r2 is the last row belonging 2251 to the this processor. This is a square mxm matrix. The remaining portion 2252 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2253 2254 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2255 2256 By default, this format uses inodes (identical nodes) when possible. 2257 We search for consecutive rows with the same nonzero structure, thereby 2258 reusing matrix information to achieve increased efficiency. 2259 2260 Options Database Keys: 2261 + -mat_aij_no_inode - Do not use inodes 2262 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2263 - -mat_aij_oneindex - Internally use indexing starting at 1 2264 rather than 0. Note that when calling MatSetValues(), 2265 the user still MUST index entries starting at 0! 2266 2267 2268 Example usage: 2269 2270 Consider the following 8x8 matrix with 34 non-zero values, that is 2271 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2272 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2273 as follows: 2274 2275 .vb 2276 1 2 0 | 0 3 0 | 0 4 2277 Proc0 0 5 6 | 7 0 0 | 8 0 2278 9 0 10 | 11 0 0 | 12 0 2279 ------------------------------------- 2280 13 0 14 | 15 16 17 | 0 0 2281 Proc1 0 18 0 | 19 20 21 | 0 0 2282 0 0 0 | 22 23 0 | 24 0 2283 ------------------------------------- 2284 Proc2 25 26 27 | 0 0 28 | 29 0 2285 30 0 0 | 31 32 33 | 0 34 2286 .ve 2287 2288 This can be represented as a collection of submatrices as: 2289 2290 .vb 2291 A B C 2292 D E F 2293 G H I 2294 .ve 2295 2296 Where the submatrices A,B,C are owned by proc0, D,E,F are 2297 owned by proc1, G,H,I are owned by proc2. 2298 2299 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2300 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2301 The 'M','N' parameters are 8,8, and have the same values on all procs. 2302 2303 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2304 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2305 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2306 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2307 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2308 matrix, ans [DF] as another SeqAIJ matrix. 2309 2310 When d_nz, o_nz parameters are specified, d_nz storage elements are 2311 allocated for every row of the local diagonal submatrix, and o_nz 2312 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2313 One way to choose d_nz and o_nz is to use the max nonzerors per local 2314 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2315 In this case, the values of d_nz,o_nz are: 2316 .vb 2317 proc0 : dnz = 2, o_nz = 2 2318 proc1 : dnz = 3, o_nz = 2 2319 proc2 : dnz = 1, o_nz = 4 2320 .ve 2321 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2322 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2323 for proc3. i.e we are using 12+15+10=37 storage locations to store 2324 34 values. 2325 2326 When d_nnz, o_nnz parameters are specified, the storage is specified 2327 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2328 In the above case the values for d_nnz,o_nnz are: 2329 .vb 2330 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2331 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2332 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2333 .ve 2334 Here the space allocated is sum of all the above values i.e 34, and 2335 hence pre-allocation is perfect. 2336 2337 Level: intermediate 2338 2339 .keywords: matrix, aij, compressed row, sparse, parallel 2340 2341 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2342 @*/ 2343 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2344 { 2345 int ierr,size; 2346 2347 PetscFunctionBegin; 2348 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2349 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2350 if (size > 1) { 2351 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2352 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2353 } else { 2354 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2355 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2356 } 2357 PetscFunctionReturn(0); 2358 } 2359 2360 #undef __FUNCT__ 2361 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2362 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2363 { 2364 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2365 PetscFunctionBegin; 2366 *Ad = a->A; 2367 *Ao = a->B; 2368 *colmap = a->garray; 2369 PetscFunctionReturn(0); 2370 } 2371 2372 #undef __FUNCT__ 2373 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2374 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2375 { 2376 int ierr; 2377 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2378 2379 PetscFunctionBegin; 2380 if (coloring->ctype == IS_COLORING_LOCAL) { 2381 int *allcolors,*colors,i; 2382 ISColoring ocoloring; 2383 2384 /* set coloring for diagonal portion */ 2385 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2386 2387 /* set coloring for off-diagonal portion */ 2388 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2389 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2390 for (i=0; i<a->B->n; i++) { 2391 colors[i] = allcolors[a->garray[i]]; 2392 } 2393 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2394 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2395 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2396 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2397 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2398 int *colors,i,*larray; 2399 ISColoring ocoloring; 2400 2401 /* set coloring for diagonal portion */ 2402 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2403 for (i=0; i<a->A->n; i++) { 2404 larray[i] = i + a->cstart; 2405 } 2406 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2407 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2408 for (i=0; i<a->A->n; i++) { 2409 colors[i] = coloring->colors[larray[i]]; 2410 } 2411 ierr = PetscFree(larray);CHKERRQ(ierr); 2412 ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2413 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2414 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2415 2416 /* set coloring for off-diagonal portion */ 2417 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2418 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2419 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2420 for (i=0; i<a->B->n; i++) { 2421 colors[i] = coloring->colors[larray[i]]; 2422 } 2423 ierr = PetscFree(larray);CHKERRQ(ierr); 2424 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2425 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2426 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2427 } else { 2428 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2429 } 2430 2431 PetscFunctionReturn(0); 2432 } 2433 2434 #undef __FUNCT__ 2435 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2436 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2437 { 2438 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2439 int ierr; 2440 2441 PetscFunctionBegin; 2442 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2443 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2444 PetscFunctionReturn(0); 2445 } 2446 2447 #undef __FUNCT__ 2448 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2449 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2450 { 2451 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2452 int ierr; 2453 2454 PetscFunctionBegin; 2455 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2456 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2457 PetscFunctionReturn(0); 2458 } 2459