1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 int i,j,rstart,ncols,n,ierr,flg; 354 int *row,*col,other_disassembled; 355 PetscScalar *val; 356 InsertMode addv = mat->insertmode; 357 358 PetscFunctionBegin; 359 if (!aij->donotstash) { 360 while (1) { 361 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 362 if (!flg) break; 363 364 for (i=0; i<n;) { 365 /* Now identify the consecutive vals belonging to the same row */ 366 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 367 if (j < n) ncols = j-i; 368 else ncols = n-i; 369 /* Now assemble all these values with a single function call */ 370 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 371 i = j; 372 } 373 } 374 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 375 } 376 377 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 378 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 379 380 /* determine if any processor has disassembled, if so we must 381 also disassemble ourselfs, in order that we may reassemble. */ 382 /* 383 if nonzero structure of submatrix B cannot change then we know that 384 no processor disassembled thus we can skip this stuff 385 */ 386 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 387 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 388 if (mat->was_assembled && !other_disassembled) { 389 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 390 } 391 } 392 393 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 398 399 if (aij->rowvalues) { 400 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 401 aij->rowvalues = 0; 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 408 int MatZeroEntries_MPIAIJ(Mat A) 409 { 410 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 411 int ierr; 412 413 PetscFunctionBegin; 414 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 415 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatZeroRows_MPIAIJ" 421 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 422 { 423 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 424 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 425 int *procs,*nprocs,j,idx,nsends,*work,row; 426 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 427 int *rvalues,tag = A->tag,count,base,slen,n,*source; 428 int *lens,imdex,*lrows,*values,rstart=l->rstart; 429 MPI_Comm comm = A->comm; 430 MPI_Request *send_waits,*recv_waits; 431 MPI_Status recv_status,*send_status; 432 IS istmp; 433 PetscTruth found; 434 435 PetscFunctionBegin; 436 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 437 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 438 439 /* first count number of contributors to each processor */ 440 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 441 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 442 procs = nprocs + size; 443 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 444 for (i=0; i<N; i++) { 445 idx = rows[i]; 446 found = PETSC_FALSE; 447 for (j=0; j<size; j++) { 448 if (idx >= owners[j] && idx < owners[j+1]) { 449 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 450 } 451 } 452 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 453 } 454 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 455 456 /* inform other processors of number of messages and max length*/ 457 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 458 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 459 nrecvs = work[size+rank]; 460 nmax = work[rank]; 461 ierr = PetscFree(work);CHKERRQ(ierr); 462 463 /* post receives: */ 464 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 465 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 466 for (i=0; i<nrecvs; i++) { 467 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 468 } 469 470 /* do sends: 471 1) starts[i] gives the starting index in svalues for stuff going to 472 the ith processor 473 */ 474 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 475 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 476 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 477 starts[0] = 0; 478 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 479 for (i=0; i<N; i++) { 480 svalues[starts[owner[i]]++] = rows[i]; 481 } 482 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 483 484 starts[0] = 0; 485 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 486 count = 0; 487 for (i=0; i<size; i++) { 488 if (procs[i]) { 489 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 490 } 491 } 492 ierr = PetscFree(starts);CHKERRQ(ierr); 493 494 base = owners[rank]; 495 496 /* wait on receives */ 497 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 498 source = lens + nrecvs; 499 count = nrecvs; slen = 0; 500 while (count) { 501 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 502 /* unpack receives into our local space */ 503 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 504 source[imdex] = recv_status.MPI_SOURCE; 505 lens[imdex] = n; 506 slen += n; 507 count--; 508 } 509 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 510 511 /* move the data into the send scatter */ 512 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 513 count = 0; 514 for (i=0; i<nrecvs; i++) { 515 values = rvalues + i*nmax; 516 for (j=0; j<lens[i]; j++) { 517 lrows[count++] = values[j] - base; 518 } 519 } 520 ierr = PetscFree(rvalues);CHKERRQ(ierr); 521 ierr = PetscFree(lens);CHKERRQ(ierr); 522 ierr = PetscFree(owner);CHKERRQ(ierr); 523 ierr = PetscFree(nprocs);CHKERRQ(ierr); 524 525 /* actually zap the local rows */ 526 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 527 PetscLogObjectParent(A,istmp); 528 529 /* 530 Zero the required rows. If the "diagonal block" of the matrix 531 is square and the user wishes to set the diagonal we use seperate 532 code so that MatSetValues() is not called for each diagonal allocating 533 new memory, thus calling lots of mallocs and slowing things down. 534 535 Contributed by: Mathew Knepley 536 */ 537 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 538 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 539 if (diag && (l->A->M == l->A->N)) { 540 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 541 } else if (diag) { 542 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 543 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 544 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 546 } 547 for (i = 0; i < slen; i++) { 548 row = lrows[i] + rstart; 549 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 550 } 551 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 552 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 553 } else { 554 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 555 } 556 ierr = ISDestroy(istmp);CHKERRQ(ierr); 557 ierr = PetscFree(lrows);CHKERRQ(ierr); 558 559 /* wait on sends */ 560 if (nsends) { 561 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 562 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 563 ierr = PetscFree(send_status);CHKERRQ(ierr); 564 } 565 ierr = PetscFree(send_waits);CHKERRQ(ierr); 566 ierr = PetscFree(svalues);CHKERRQ(ierr); 567 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "MatMult_MPIAIJ" 573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 574 { 575 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 576 int ierr,nt; 577 578 PetscFunctionBegin; 579 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 580 if (nt != A->n) { 581 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 582 } 583 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 584 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 585 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 586 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatMultAdd_MPIAIJ" 592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 593 { 594 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 595 int ierr; 596 597 PetscFunctionBegin; 598 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 599 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 600 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 601 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 607 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 608 { 609 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 610 int ierr; 611 612 PetscFunctionBegin; 613 /* do nondiagonal part */ 614 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 615 /* send it on its way */ 616 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 617 /* do local part */ 618 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 619 /* receive remote parts: note this assumes the values are not actually */ 620 /* inserted in yy until the next line, which is true for my implementation*/ 621 /* but is not perhaps always true. */ 622 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 628 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 629 { 630 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 631 int ierr; 632 633 PetscFunctionBegin; 634 /* do nondiagonal part */ 635 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 636 /* send it on its way */ 637 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 638 /* do local part */ 639 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 640 /* receive remote parts: note this assumes the values are not actually */ 641 /* inserted in yy until the next line, which is true for my implementation*/ 642 /* but is not perhaps always true. */ 643 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 /* 648 This only works correctly for square matrices where the subblock A->A is the 649 diagonal block 650 */ 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 654 { 655 int ierr; 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 658 PetscFunctionBegin; 659 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 660 if (a->rstart != a->cstart || a->rend != a->cend) { 661 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 662 } 663 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatScale_MPIAIJ" 669 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 670 { 671 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 672 int ierr; 673 674 PetscFunctionBegin; 675 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 676 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatDestroy_MPIAIJ" 682 int MatDestroy_MPIAIJ(Mat mat) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 int ierr; 686 687 PetscFunctionBegin; 688 #if defined(PETSC_USE_LOG) 689 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 690 #endif 691 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 692 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 693 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 694 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 695 #if defined (PETSC_USE_CTABLE) 696 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 697 #else 698 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 699 #endif 700 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 701 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 702 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 703 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 704 ierr = PetscFree(aij);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 710 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 711 { 712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 713 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 714 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 715 PetscTruth isdraw,isascii,flg; 716 PetscViewer sviewer; 717 PetscViewerFormat format; 718 719 PetscFunctionBegin; 720 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 721 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 722 if (isascii) { 723 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 724 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 725 MatInfo info; 726 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 727 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 728 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 729 if (flg) { 730 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 731 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 732 } else { 733 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 734 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 735 } 736 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 737 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 738 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 739 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 740 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 741 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } else if (format == PETSC_VIEWER_ASCII_INFO) { 744 PetscFunctionReturn(0); 745 } 746 } else if (isdraw) { 747 PetscDraw draw; 748 PetscTruth isnull; 749 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 750 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 751 } 752 753 if (size == 1) { 754 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 755 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 756 } else { 757 /* assemble the entire matrix onto first processor. */ 758 Mat A; 759 Mat_SeqAIJ *Aloc; 760 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 761 PetscScalar *a; 762 763 if (!rank) { 764 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 765 } else { 766 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 767 } 768 PetscLogObjectParent(mat,A); 769 770 /* copy over the A part */ 771 Aloc = (Mat_SeqAIJ*)aij->A->data; 772 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 773 row = aij->rstart; 774 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 775 for (i=0; i<m; i++) { 776 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 777 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 778 } 779 aj = Aloc->j; 780 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 781 782 /* copy over the B part */ 783 Aloc = (Mat_SeqAIJ*)aij->B->data; 784 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 785 row = aij->rstart; 786 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 787 ct = cols; 788 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 789 for (i=0; i<m; i++) { 790 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 791 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 792 } 793 ierr = PetscFree(ct);CHKERRQ(ierr); 794 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 795 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 796 /* 797 Everyone has to call to draw the matrix since the graphics waits are 798 synchronized across all processors that share the PetscDraw object 799 */ 800 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 801 if (!rank) { 802 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 803 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 804 } 805 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 806 ierr = MatDestroy(A);CHKERRQ(ierr); 807 } 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "MatView_MPIAIJ" 813 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 814 { 815 int ierr; 816 PetscTruth isascii,isdraw,issocket,isbinary; 817 818 PetscFunctionBegin; 819 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 820 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 821 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 822 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 823 if (isascii || isdraw || isbinary || issocket) { 824 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 825 } else { 826 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 827 } 828 PetscFunctionReturn(0); 829 } 830 831 /* 832 This has to provide several versions. 833 834 2) a) use only local smoothing updating outer values only once. 835 b) local smoothing updating outer values each inner iteration 836 3) color updating out values betwen colors. 837 */ 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatRelax_MPIAIJ" 840 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx) 841 { 842 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 843 Mat AA = mat->A,BB = mat->B; 844 Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data; 845 PetscScalar *b,*x,*xs,*ls,d,*v,sum; 846 int ierr,*idx,*diag; 847 int n = matin->n,m = matin->m,i,shift = A->indexshift; 848 849 PetscFunctionBegin; 850 if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);} 851 diag = A->diag; 852 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 853 if (flag & SOR_ZERO_INITIAL_GUESS) { 854 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 855 PetscFunctionReturn(0); 856 } 857 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 858 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 859 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 860 if (xx != bb) { 861 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 862 } else { 863 b = x; 864 } 865 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 866 xs = x + shift; /* shift by one for index start of 1 */ 867 ls = ls + shift; 868 while (its--) { 869 /* go down through the rows */ 870 for (i=0; i<m; i++) { 871 n = A->i[i+1] - A->i[i]; 872 PetscLogFlops(4*n+3); 873 idx = A->j + A->i[i] + shift; 874 v = A->a + A->i[i] + shift; 875 sum = b[i]; 876 SPARSEDENSEMDOT(sum,xs,v,idx,n); 877 d = fshift + A->a[diag[i]+shift]; 878 n = B->i[i+1] - B->i[i]; 879 idx = B->j + B->i[i] + shift; 880 v = B->a + B->i[i] + shift; 881 SPARSEDENSEMDOT(sum,ls,v,idx,n); 882 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 883 } 884 /* come up through the rows */ 885 for (i=m-1; i>-1; i--) { 886 n = A->i[i+1] - A->i[i]; 887 PetscLogFlops(4*n+3); 888 idx = A->j + A->i[i] + shift; 889 v = A->a + A->i[i] + shift; 890 sum = b[i]; 891 SPARSEDENSEMDOT(sum,xs,v,idx,n); 892 d = fshift + A->a[diag[i]+shift]; 893 n = B->i[i+1] - B->i[i]; 894 idx = B->j + B->i[i] + shift; 895 v = B->a + B->i[i] + shift; 896 SPARSEDENSEMDOT(sum,ls,v,idx,n); 897 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 898 } 899 } 900 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 901 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 902 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 903 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 904 if (flag & SOR_ZERO_INITIAL_GUESS) { 905 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 906 PetscFunctionReturn(0); 907 } 908 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 909 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 910 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 911 if (xx != bb) { 912 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 913 } else { 914 b = x; 915 } 916 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 917 xs = x + shift; /* shift by one for index start of 1 */ 918 ls = ls + shift; 919 while (its--) { 920 for (i=0; i<m; i++) { 921 n = A->i[i+1] - A->i[i]; 922 PetscLogFlops(4*n+3); 923 idx = A->j + A->i[i] + shift; 924 v = A->a + A->i[i] + shift; 925 sum = b[i]; 926 SPARSEDENSEMDOT(sum,xs,v,idx,n); 927 d = fshift + A->a[diag[i]+shift]; 928 n = B->i[i+1] - B->i[i]; 929 idx = B->j + B->i[i] + shift; 930 v = B->a + B->i[i] + shift; 931 SPARSEDENSEMDOT(sum,ls,v,idx,n); 932 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 933 } 934 } 935 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 936 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 937 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 938 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 939 if (flag & SOR_ZERO_INITIAL_GUESS) { 940 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 941 PetscFunctionReturn(0); 942 } 943 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 944 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 945 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 946 if (xx != bb) { 947 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 948 } else { 949 b = x; 950 } 951 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 952 xs = x + shift; /* shift by one for index start of 1 */ 953 ls = ls + shift; 954 while (its--) { 955 for (i=m-1; i>-1; i--) { 956 n = A->i[i+1] - A->i[i]; 957 PetscLogFlops(4*n+3); 958 idx = A->j + A->i[i] + shift; 959 v = A->a + A->i[i] + shift; 960 sum = b[i]; 961 SPARSEDENSEMDOT(sum,xs,v,idx,n); 962 d = fshift + A->a[diag[i]+shift]; 963 n = B->i[i+1] - B->i[i]; 964 idx = B->j + B->i[i] + shift; 965 v = B->a + B->i[i] + shift; 966 SPARSEDENSEMDOT(sum,ls,v,idx,n); 967 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 968 } 969 } 970 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 971 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 972 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 973 } else { 974 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 975 } 976 PetscFunctionReturn(0); 977 } 978 979 #undef __FUNCT__ 980 #define __FUNCT__ "MatGetInfo_MPIAIJ" 981 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 982 { 983 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 984 Mat A = mat->A,B = mat->B; 985 int ierr; 986 PetscReal isend[5],irecv[5]; 987 988 PetscFunctionBegin; 989 info->block_size = 1.0; 990 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 991 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 992 isend[3] = info->memory; isend[4] = info->mallocs; 993 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 994 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 995 isend[3] += info->memory; isend[4] += info->mallocs; 996 if (flag == MAT_LOCAL) { 997 info->nz_used = isend[0]; 998 info->nz_allocated = isend[1]; 999 info->nz_unneeded = isend[2]; 1000 info->memory = isend[3]; 1001 info->mallocs = isend[4]; 1002 } else if (flag == MAT_GLOBAL_MAX) { 1003 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1004 info->nz_used = irecv[0]; 1005 info->nz_allocated = irecv[1]; 1006 info->nz_unneeded = irecv[2]; 1007 info->memory = irecv[3]; 1008 info->mallocs = irecv[4]; 1009 } else if (flag == MAT_GLOBAL_SUM) { 1010 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1011 info->nz_used = irecv[0]; 1012 info->nz_allocated = irecv[1]; 1013 info->nz_unneeded = irecv[2]; 1014 info->memory = irecv[3]; 1015 info->mallocs = irecv[4]; 1016 } 1017 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1018 info->fill_ratio_needed = 0; 1019 info->factor_mallocs = 0; 1020 info->rows_global = (double)matin->M; 1021 info->columns_global = (double)matin->N; 1022 info->rows_local = (double)matin->m; 1023 info->columns_local = (double)matin->N; 1024 1025 PetscFunctionReturn(0); 1026 } 1027 1028 #undef __FUNCT__ 1029 #define __FUNCT__ "MatSetOption_MPIAIJ" 1030 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1031 { 1032 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1033 int ierr; 1034 1035 PetscFunctionBegin; 1036 switch (op) { 1037 case MAT_NO_NEW_NONZERO_LOCATIONS: 1038 case MAT_YES_NEW_NONZERO_LOCATIONS: 1039 case MAT_COLUMNS_UNSORTED: 1040 case MAT_COLUMNS_SORTED: 1041 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1042 case MAT_KEEP_ZEROED_ROWS: 1043 case MAT_NEW_NONZERO_LOCATION_ERR: 1044 case MAT_USE_INODES: 1045 case MAT_DO_NOT_USE_INODES: 1046 case MAT_IGNORE_ZERO_ENTRIES: 1047 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1048 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1049 break; 1050 case MAT_ROW_ORIENTED: 1051 a->roworiented = PETSC_TRUE; 1052 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1053 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1054 break; 1055 case MAT_ROWS_SORTED: 1056 case MAT_ROWS_UNSORTED: 1057 case MAT_YES_NEW_DIAGONALS: 1058 case MAT_USE_SINGLE_PRECISION_SOLVES: 1059 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1060 break; 1061 case MAT_COLUMN_ORIENTED: 1062 a->roworiented = PETSC_FALSE; 1063 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1064 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1065 break; 1066 case MAT_IGNORE_OFF_PROC_ENTRIES: 1067 a->donotstash = PETSC_TRUE; 1068 break; 1069 case MAT_NO_NEW_DIAGONALS: 1070 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1071 break; 1072 default: 1073 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1074 break; 1075 } 1076 PetscFunctionReturn(0); 1077 } 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatGetRow_MPIAIJ" 1081 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1082 { 1083 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1084 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1085 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1086 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1087 int *cmap,*idx_p; 1088 1089 PetscFunctionBegin; 1090 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1091 mat->getrowactive = PETSC_TRUE; 1092 1093 if (!mat->rowvalues && (idx || v)) { 1094 /* 1095 allocate enough space to hold information from the longest row. 1096 */ 1097 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1098 int max = 1,tmp; 1099 for (i=0; i<matin->m; i++) { 1100 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1101 if (max < tmp) { max = tmp; } 1102 } 1103 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1104 mat->rowindices = (int*)(mat->rowvalues + max); 1105 } 1106 1107 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1108 lrow = row - rstart; 1109 1110 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1111 if (!v) {pvA = 0; pvB = 0;} 1112 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1113 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1114 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1115 nztot = nzA + nzB; 1116 1117 cmap = mat->garray; 1118 if (v || idx) { 1119 if (nztot) { 1120 /* Sort by increasing column numbers, assuming A and B already sorted */ 1121 int imark = -1; 1122 if (v) { 1123 *v = v_p = mat->rowvalues; 1124 for (i=0; i<nzB; i++) { 1125 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1126 else break; 1127 } 1128 imark = i; 1129 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1130 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1131 } 1132 if (idx) { 1133 *idx = idx_p = mat->rowindices; 1134 if (imark > -1) { 1135 for (i=0; i<imark; i++) { 1136 idx_p[i] = cmap[cworkB[i]]; 1137 } 1138 } else { 1139 for (i=0; i<nzB; i++) { 1140 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1141 else break; 1142 } 1143 imark = i; 1144 } 1145 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1146 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1147 } 1148 } else { 1149 if (idx) *idx = 0; 1150 if (v) *v = 0; 1151 } 1152 } 1153 *nz = nztot; 1154 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1155 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #undef __FUNCT__ 1160 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1161 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1162 { 1163 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1164 1165 PetscFunctionBegin; 1166 if (aij->getrowactive == PETSC_FALSE) { 1167 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1168 } 1169 aij->getrowactive = PETSC_FALSE; 1170 PetscFunctionReturn(0); 1171 } 1172 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MatNorm_MPIAIJ" 1175 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1176 { 1177 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1178 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1179 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1180 PetscReal sum = 0.0; 1181 PetscScalar *v; 1182 1183 PetscFunctionBegin; 1184 if (aij->size == 1) { 1185 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1186 } else { 1187 if (type == NORM_FROBENIUS) { 1188 v = amat->a; 1189 for (i=0; i<amat->nz; i++) { 1190 #if defined(PETSC_USE_COMPLEX) 1191 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1192 #else 1193 sum += (*v)*(*v); v++; 1194 #endif 1195 } 1196 v = bmat->a; 1197 for (i=0; i<bmat->nz; i++) { 1198 #if defined(PETSC_USE_COMPLEX) 1199 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1200 #else 1201 sum += (*v)*(*v); v++; 1202 #endif 1203 } 1204 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1205 *norm = sqrt(*norm); 1206 } else if (type == NORM_1) { /* max column norm */ 1207 PetscReal *tmp,*tmp2; 1208 int *jj,*garray = aij->garray; 1209 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1210 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1211 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1212 *norm = 0.0; 1213 v = amat->a; jj = amat->j; 1214 for (j=0; j<amat->nz; j++) { 1215 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1216 } 1217 v = bmat->a; jj = bmat->j; 1218 for (j=0; j<bmat->nz; j++) { 1219 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1220 } 1221 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1222 for (j=0; j<mat->N; j++) { 1223 if (tmp2[j] > *norm) *norm = tmp2[j]; 1224 } 1225 ierr = PetscFree(tmp);CHKERRQ(ierr); 1226 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1227 } else if (type == NORM_INFINITY) { /* max row norm */ 1228 PetscReal ntemp = 0.0; 1229 for (j=0; j<aij->A->m; j++) { 1230 v = amat->a + amat->i[j] + shift; 1231 sum = 0.0; 1232 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1233 sum += PetscAbsScalar(*v); v++; 1234 } 1235 v = bmat->a + bmat->i[j] + shift; 1236 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1237 sum += PetscAbsScalar(*v); v++; 1238 } 1239 if (sum > ntemp) ntemp = sum; 1240 } 1241 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1242 } else { 1243 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1244 } 1245 } 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatTranspose_MPIAIJ" 1251 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1252 { 1253 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1254 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1255 int ierr,shift = Aloc->indexshift; 1256 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1257 Mat B; 1258 PetscScalar *array; 1259 1260 PetscFunctionBegin; 1261 if (!matout && M != N) { 1262 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1263 } 1264 1265 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1266 1267 /* copy over the A part */ 1268 Aloc = (Mat_SeqAIJ*)a->A->data; 1269 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1270 row = a->rstart; 1271 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1272 for (i=0; i<m; i++) { 1273 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1274 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1275 } 1276 aj = Aloc->j; 1277 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1278 1279 /* copy over the B part */ 1280 Aloc = (Mat_SeqAIJ*)a->B->data; 1281 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1282 row = a->rstart; 1283 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1284 ct = cols; 1285 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1286 for (i=0; i<m; i++) { 1287 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1288 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1289 } 1290 ierr = PetscFree(ct);CHKERRQ(ierr); 1291 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1292 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1293 if (matout) { 1294 *matout = B; 1295 } else { 1296 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1297 } 1298 PetscFunctionReturn(0); 1299 } 1300 1301 #undef __FUNCT__ 1302 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1303 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1304 { 1305 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1306 Mat a = aij->A,b = aij->B; 1307 int ierr,s1,s2,s3; 1308 1309 PetscFunctionBegin; 1310 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1311 if (rr) { 1312 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1313 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1314 /* Overlap communication with computation. */ 1315 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1316 } 1317 if (ll) { 1318 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1319 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1320 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1321 } 1322 /* scale the diagonal block */ 1323 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1324 1325 if (rr) { 1326 /* Do a scatter end and then right scale the off-diagonal block */ 1327 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1328 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1329 } 1330 1331 PetscFunctionReturn(0); 1332 } 1333 1334 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1337 int MatPrintHelp_MPIAIJ(Mat A) 1338 { 1339 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1340 int ierr; 1341 1342 PetscFunctionBegin; 1343 if (!a->rank) { 1344 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1345 } 1346 PetscFunctionReturn(0); 1347 } 1348 1349 #undef __FUNCT__ 1350 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1351 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1352 { 1353 PetscFunctionBegin; 1354 *bs = 1; 1355 PetscFunctionReturn(0); 1356 } 1357 #undef __FUNCT__ 1358 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1359 int MatSetUnfactored_MPIAIJ(Mat A) 1360 { 1361 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1362 int ierr; 1363 1364 PetscFunctionBegin; 1365 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1366 PetscFunctionReturn(0); 1367 } 1368 1369 #undef __FUNCT__ 1370 #define __FUNCT__ "MatEqual_MPIAIJ" 1371 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1372 { 1373 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1374 Mat a,b,c,d; 1375 PetscTruth flg; 1376 int ierr; 1377 1378 PetscFunctionBegin; 1379 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1380 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1381 a = matA->A; b = matA->B; 1382 c = matB->A; d = matB->B; 1383 1384 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1385 if (flg == PETSC_TRUE) { 1386 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1387 } 1388 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatCopy_MPIAIJ" 1394 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1395 { 1396 int ierr; 1397 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1398 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1399 PetscTruth flg; 1400 1401 PetscFunctionBegin; 1402 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1403 if (str != SAME_NONZERO_PATTERN || !flg) { 1404 /* because of the column compression in the off-processor part of the matrix a->B, 1405 the number of columns in a->B and b->B may be different, hence we cannot call 1406 the MatCopy() directly on the two parts. If need be, we can provide a more 1407 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1408 then copying the submatrices */ 1409 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1410 } else { 1411 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1412 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1413 } 1414 PetscFunctionReturn(0); 1415 } 1416 1417 #undef __FUNCT__ 1418 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1419 int MatSetUpPreallocation_MPIAIJ(Mat A) 1420 { 1421 int ierr; 1422 1423 PetscFunctionBegin; 1424 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1429 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1430 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1431 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1432 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1433 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1434 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1435 #endif 1436 1437 /* -------------------------------------------------------------------*/ 1438 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1439 MatGetRow_MPIAIJ, 1440 MatRestoreRow_MPIAIJ, 1441 MatMult_MPIAIJ, 1442 MatMultAdd_MPIAIJ, 1443 MatMultTranspose_MPIAIJ, 1444 MatMultTransposeAdd_MPIAIJ, 1445 0, 1446 0, 1447 0, 1448 0, 1449 0, 1450 0, 1451 MatRelax_MPIAIJ, 1452 MatTranspose_MPIAIJ, 1453 MatGetInfo_MPIAIJ, 1454 MatEqual_MPIAIJ, 1455 MatGetDiagonal_MPIAIJ, 1456 MatDiagonalScale_MPIAIJ, 1457 MatNorm_MPIAIJ, 1458 MatAssemblyBegin_MPIAIJ, 1459 MatAssemblyEnd_MPIAIJ, 1460 0, 1461 MatSetOption_MPIAIJ, 1462 MatZeroEntries_MPIAIJ, 1463 MatZeroRows_MPIAIJ, 1464 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1465 MatLUFactorSymbolic_MPIAIJ_TFS, 1466 #else 1467 0, 1468 #endif 1469 0, 1470 0, 1471 0, 1472 MatSetUpPreallocation_MPIAIJ, 1473 0, 1474 0, 1475 0, 1476 0, 1477 0, 1478 MatDuplicate_MPIAIJ, 1479 0, 1480 0, 1481 0, 1482 0, 1483 0, 1484 MatGetSubMatrices_MPIAIJ, 1485 MatIncreaseOverlap_MPIAIJ, 1486 MatGetValues_MPIAIJ, 1487 MatCopy_MPIAIJ, 1488 MatPrintHelp_MPIAIJ, 1489 MatScale_MPIAIJ, 1490 0, 1491 0, 1492 0, 1493 MatGetBlockSize_MPIAIJ, 1494 0, 1495 0, 1496 0, 1497 0, 1498 MatFDColoringCreate_MPIAIJ, 1499 0, 1500 MatSetUnfactored_MPIAIJ, 1501 0, 1502 0, 1503 MatGetSubMatrix_MPIAIJ, 1504 MatDestroy_MPIAIJ, 1505 MatView_MPIAIJ, 1506 MatGetPetscMaps_Petsc, 1507 0, 1508 0, 1509 0, 1510 0, 1511 0, 1512 0, 1513 0, 1514 0, 1515 MatSetColoring_MPIAIJ, 1516 MatSetValuesAdic_MPIAIJ, 1517 MatSetValuesAdifor_MPIAIJ 1518 }; 1519 1520 /* ----------------------------------------------------------------------------------------*/ 1521 1522 EXTERN_C_BEGIN 1523 #undef __FUNCT__ 1524 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1525 int MatStoreValues_MPIAIJ(Mat mat) 1526 { 1527 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1528 int ierr; 1529 1530 PetscFunctionBegin; 1531 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1532 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1533 PetscFunctionReturn(0); 1534 } 1535 EXTERN_C_END 1536 1537 EXTERN_C_BEGIN 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1540 int MatRetrieveValues_MPIAIJ(Mat mat) 1541 { 1542 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1543 int ierr; 1544 1545 PetscFunctionBegin; 1546 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1547 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1548 PetscFunctionReturn(0); 1549 } 1550 EXTERN_C_END 1551 1552 #include "petscpc.h" 1553 EXTERN_C_BEGIN 1554 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1555 EXTERN_C_END 1556 1557 EXTERN_C_BEGIN 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatCreate_MPIAIJ" 1560 int MatCreate_MPIAIJ(Mat B) 1561 { 1562 Mat_MPIAIJ *b; 1563 int ierr,i,size; 1564 1565 PetscFunctionBegin; 1566 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1567 1568 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1569 B->data = (void*)b; 1570 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1571 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1572 B->factor = 0; 1573 B->assembled = PETSC_FALSE; 1574 B->mapping = 0; 1575 1576 B->insertmode = NOT_SET_VALUES; 1577 b->size = size; 1578 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1579 1580 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1581 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1582 1583 /* the information in the maps duplicates the information computed below, eventually 1584 we should remove the duplicate information that is not contained in the maps */ 1585 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1586 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1587 1588 /* build local table of row and column ownerships */ 1589 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1590 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1591 b->cowners = b->rowners + b->size + 2; 1592 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1593 b->rowners[0] = 0; 1594 for (i=2; i<=b->size; i++) { 1595 b->rowners[i] += b->rowners[i-1]; 1596 } 1597 b->rstart = b->rowners[b->rank]; 1598 b->rend = b->rowners[b->rank+1]; 1599 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1600 b->cowners[0] = 0; 1601 for (i=2; i<=b->size; i++) { 1602 b->cowners[i] += b->cowners[i-1]; 1603 } 1604 b->cstart = b->cowners[b->rank]; 1605 b->cend = b->cowners[b->rank+1]; 1606 1607 /* build cache for off array entries formed */ 1608 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1609 b->donotstash = PETSC_FALSE; 1610 b->colmap = 0; 1611 b->garray = 0; 1612 b->roworiented = PETSC_TRUE; 1613 1614 /* stuff used for matrix vector multiply */ 1615 b->lvec = PETSC_NULL; 1616 b->Mvctx = PETSC_NULL; 1617 1618 /* stuff for MatGetRow() */ 1619 b->rowindices = 0; 1620 b->rowvalues = 0; 1621 b->getrowactive = PETSC_FALSE; 1622 1623 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1624 "MatStoreValues_MPIAIJ", 1625 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1626 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1627 "MatRetrieveValues_MPIAIJ", 1628 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1629 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1630 "MatGetDiagonalBlock_MPIAIJ", 1631 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 EXTERN_C_END 1635 1636 #undef __FUNCT__ 1637 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1638 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1639 { 1640 Mat mat; 1641 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1642 int ierr; 1643 1644 PetscFunctionBegin; 1645 *newmat = 0; 1646 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1647 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1648 a = (Mat_MPIAIJ*)mat->data; 1649 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1650 mat->factor = matin->factor; 1651 mat->assembled = PETSC_TRUE; 1652 mat->insertmode = NOT_SET_VALUES; 1653 mat->preallocated = PETSC_TRUE; 1654 1655 a->rstart = oldmat->rstart; 1656 a->rend = oldmat->rend; 1657 a->cstart = oldmat->cstart; 1658 a->cend = oldmat->cend; 1659 a->size = oldmat->size; 1660 a->rank = oldmat->rank; 1661 a->donotstash = oldmat->donotstash; 1662 a->roworiented = oldmat->roworiented; 1663 a->rowindices = 0; 1664 a->rowvalues = 0; 1665 a->getrowactive = PETSC_FALSE; 1666 1667 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1668 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1669 if (oldmat->colmap) { 1670 #if defined (PETSC_USE_CTABLE) 1671 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1672 #else 1673 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1674 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1675 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1676 #endif 1677 } else a->colmap = 0; 1678 if (oldmat->garray) { 1679 int len; 1680 len = oldmat->B->n; 1681 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1682 PetscLogObjectMemory(mat,len*sizeof(int)); 1683 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1684 } else a->garray = 0; 1685 1686 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1687 PetscLogObjectParent(mat,a->lvec); 1688 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1689 PetscLogObjectParent(mat,a->Mvctx); 1690 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1691 PetscLogObjectParent(mat,a->A); 1692 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1693 PetscLogObjectParent(mat,a->B); 1694 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1695 *newmat = mat; 1696 PetscFunctionReturn(0); 1697 } 1698 1699 #include "petscsys.h" 1700 1701 EXTERN_C_BEGIN 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatLoad_MPIAIJ" 1704 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1705 { 1706 Mat A; 1707 PetscScalar *vals,*svals; 1708 MPI_Comm comm = ((PetscObject)viewer)->comm; 1709 MPI_Status status; 1710 int i,nz,ierr,j,rstart,rend,fd; 1711 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1712 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1713 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1714 1715 PetscFunctionBegin; 1716 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1717 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1718 if (!rank) { 1719 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1720 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1721 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1722 if (header[3] < 0) { 1723 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1724 } 1725 } 1726 1727 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1728 M = header[1]; N = header[2]; 1729 /* determine ownership of all rows */ 1730 m = M/size + ((M % size) > rank); 1731 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1732 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1733 rowners[0] = 0; 1734 for (i=2; i<=size; i++) { 1735 rowners[i] += rowners[i-1]; 1736 } 1737 rstart = rowners[rank]; 1738 rend = rowners[rank+1]; 1739 1740 /* distribute row lengths to all processors */ 1741 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1742 offlens = ourlens + (rend-rstart); 1743 if (!rank) { 1744 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1745 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1746 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1747 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1748 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1749 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1750 } else { 1751 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1752 } 1753 1754 if (!rank) { 1755 /* calculate the number of nonzeros on each processor */ 1756 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1757 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1758 for (i=0; i<size; i++) { 1759 for (j=rowners[i]; j< rowners[i+1]; j++) { 1760 procsnz[i] += rowlengths[j]; 1761 } 1762 } 1763 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1764 1765 /* determine max buffer needed and allocate it */ 1766 maxnz = 0; 1767 for (i=0; i<size; i++) { 1768 maxnz = PetscMax(maxnz,procsnz[i]); 1769 } 1770 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1771 1772 /* read in my part of the matrix column indices */ 1773 nz = procsnz[0]; 1774 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1775 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1776 1777 /* read in every one elses and ship off */ 1778 for (i=1; i<size; i++) { 1779 nz = procsnz[i]; 1780 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1781 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1782 } 1783 ierr = PetscFree(cols);CHKERRQ(ierr); 1784 } else { 1785 /* determine buffer space needed for message */ 1786 nz = 0; 1787 for (i=0; i<m; i++) { 1788 nz += ourlens[i]; 1789 } 1790 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1791 1792 /* receive message of column indices*/ 1793 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1794 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1795 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1796 } 1797 1798 /* determine column ownership if matrix is not square */ 1799 if (N != M) { 1800 n = N/size + ((N % size) > rank); 1801 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1802 cstart = cend - n; 1803 } else { 1804 cstart = rstart; 1805 cend = rend; 1806 n = cend - cstart; 1807 } 1808 1809 /* loop over local rows, determining number of off diagonal entries */ 1810 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1811 jj = 0; 1812 for (i=0; i<m; i++) { 1813 for (j=0; j<ourlens[i]; j++) { 1814 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1815 jj++; 1816 } 1817 } 1818 1819 /* create our matrix */ 1820 for (i=0; i<m; i++) { 1821 ourlens[i] -= offlens[i]; 1822 } 1823 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1824 A = *newmat; 1825 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1826 for (i=0; i<m; i++) { 1827 ourlens[i] += offlens[i]; 1828 } 1829 1830 if (!rank) { 1831 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1832 1833 /* read in my part of the matrix numerical values */ 1834 nz = procsnz[0]; 1835 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1836 1837 /* insert into matrix */ 1838 jj = rstart; 1839 smycols = mycols; 1840 svals = vals; 1841 for (i=0; i<m; i++) { 1842 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1843 smycols += ourlens[i]; 1844 svals += ourlens[i]; 1845 jj++; 1846 } 1847 1848 /* read in other processors and ship out */ 1849 for (i=1; i<size; i++) { 1850 nz = procsnz[i]; 1851 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1852 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1853 } 1854 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1855 } else { 1856 /* receive numeric values */ 1857 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1858 1859 /* receive message of values*/ 1860 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1861 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1862 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1863 1864 /* insert into matrix */ 1865 jj = rstart; 1866 smycols = mycols; 1867 svals = vals; 1868 for (i=0; i<m; i++) { 1869 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1870 smycols += ourlens[i]; 1871 svals += ourlens[i]; 1872 jj++; 1873 } 1874 } 1875 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1876 ierr = PetscFree(vals);CHKERRQ(ierr); 1877 ierr = PetscFree(mycols);CHKERRQ(ierr); 1878 ierr = PetscFree(rowners);CHKERRQ(ierr); 1879 1880 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1881 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1882 PetscFunctionReturn(0); 1883 } 1884 EXTERN_C_END 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1888 /* 1889 Not great since it makes two copies of the submatrix, first an SeqAIJ 1890 in local and then by concatenating the local matrices the end result. 1891 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1892 */ 1893 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1894 { 1895 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1896 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1897 Mat *local,M,Mreuse; 1898 PetscScalar *vwork,*aa; 1899 MPI_Comm comm = mat->comm; 1900 Mat_SeqAIJ *aij; 1901 1902 1903 PetscFunctionBegin; 1904 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1905 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1906 1907 if (call == MAT_REUSE_MATRIX) { 1908 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1909 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1910 local = &Mreuse; 1911 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1912 } else { 1913 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1914 Mreuse = *local; 1915 ierr = PetscFree(local);CHKERRQ(ierr); 1916 } 1917 1918 /* 1919 m - number of local rows 1920 n - number of columns (same on all processors) 1921 rstart - first row in new global matrix generated 1922 */ 1923 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1924 if (call == MAT_INITIAL_MATRIX) { 1925 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1926 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1927 ii = aij->i; 1928 jj = aij->j; 1929 1930 /* 1931 Determine the number of non-zeros in the diagonal and off-diagonal 1932 portions of the matrix in order to do correct preallocation 1933 */ 1934 1935 /* first get start and end of "diagonal" columns */ 1936 if (csize == PETSC_DECIDE) { 1937 nlocal = n/size + ((n % size) > rank); 1938 } else { 1939 nlocal = csize; 1940 } 1941 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1942 rstart = rend - nlocal; 1943 if (rank == size - 1 && rend != n) { 1944 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1945 } 1946 1947 /* next, compute all the lengths */ 1948 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1949 olens = dlens + m; 1950 for (i=0; i<m; i++) { 1951 jend = ii[i+1] - ii[i]; 1952 olen = 0; 1953 dlen = 0; 1954 for (j=0; j<jend; j++) { 1955 if (*jj < rstart || *jj >= rend) olen++; 1956 else dlen++; 1957 jj++; 1958 } 1959 olens[i] = olen; 1960 dlens[i] = dlen; 1961 } 1962 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1963 ierr = PetscFree(dlens);CHKERRQ(ierr); 1964 } else { 1965 int ml,nl; 1966 1967 M = *newmat; 1968 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1969 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1970 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1971 /* 1972 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1973 rather than the slower MatSetValues(). 1974 */ 1975 M->was_assembled = PETSC_TRUE; 1976 M->assembled = PETSC_FALSE; 1977 } 1978 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1979 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1980 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1981 ii = aij->i; 1982 jj = aij->j; 1983 aa = aij->a; 1984 for (i=0; i<m; i++) { 1985 row = rstart + i; 1986 nz = ii[i+1] - ii[i]; 1987 cwork = jj; jj += nz; 1988 vwork = aa; aa += nz; 1989 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1990 } 1991 1992 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1993 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1994 *newmat = M; 1995 1996 /* save submatrix used in processor for next request */ 1997 if (call == MAT_INITIAL_MATRIX) { 1998 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1999 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2000 } 2001 2002 PetscFunctionReturn(0); 2003 } 2004 2005 #undef __FUNCT__ 2006 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2007 /*@C 2008 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2009 (the default parallel PETSc format). For good matrix assembly performance 2010 the user should preallocate the matrix storage by setting the parameters 2011 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2012 performance can be increased by more than a factor of 50. 2013 2014 Collective on MPI_Comm 2015 2016 Input Parameters: 2017 + A - the matrix 2018 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2019 (same value is used for all local rows) 2020 . d_nnz - array containing the number of nonzeros in the various rows of the 2021 DIAGONAL portion of the local submatrix (possibly different for each row) 2022 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2023 The size of this array is equal to the number of local rows, i.e 'm'. 2024 You must leave room for the diagonal entry even if it is zero. 2025 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2026 submatrix (same value is used for all local rows). 2027 - o_nnz - array containing the number of nonzeros in the various rows of the 2028 OFF-DIAGONAL portion of the local submatrix (possibly different for 2029 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2030 structure. The size of this array is equal to the number 2031 of local rows, i.e 'm'. 2032 2033 The AIJ format (also called the Yale sparse matrix format or 2034 compressed row storage), is fully compatible with standard Fortran 77 2035 storage. That is, the stored row and column indices can begin at 2036 either one (as in Fortran) or zero. See the users manual for details. 2037 2038 The user MUST specify either the local or global matrix dimensions 2039 (possibly both). 2040 2041 The parallel matrix is partitioned such that the first m0 rows belong to 2042 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2043 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2044 2045 The DIAGONAL portion of the local submatrix of a processor can be defined 2046 as the submatrix which is obtained by extraction the part corresponding 2047 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2048 first row that belongs to the processor, and r2 is the last row belonging 2049 to the this processor. This is a square mxm matrix. The remaining portion 2050 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2051 2052 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2053 2054 By default, this format uses inodes (identical nodes) when possible. 2055 We search for consecutive rows with the same nonzero structure, thereby 2056 reusing matrix information to achieve increased efficiency. 2057 2058 Options Database Keys: 2059 + -mat_aij_no_inode - Do not use inodes 2060 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2061 - -mat_aij_oneindex - Internally use indexing starting at 1 2062 rather than 0. Note that when calling MatSetValues(), 2063 the user still MUST index entries starting at 0! 2064 2065 Example usage: 2066 2067 Consider the following 8x8 matrix with 34 non-zero values, that is 2068 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2069 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2070 as follows: 2071 2072 .vb 2073 1 2 0 | 0 3 0 | 0 4 2074 Proc0 0 5 6 | 7 0 0 | 8 0 2075 9 0 10 | 11 0 0 | 12 0 2076 ------------------------------------- 2077 13 0 14 | 15 16 17 | 0 0 2078 Proc1 0 18 0 | 19 20 21 | 0 0 2079 0 0 0 | 22 23 0 | 24 0 2080 ------------------------------------- 2081 Proc2 25 26 27 | 0 0 28 | 29 0 2082 30 0 0 | 31 32 33 | 0 34 2083 .ve 2084 2085 This can be represented as a collection of submatrices as: 2086 2087 .vb 2088 A B C 2089 D E F 2090 G H I 2091 .ve 2092 2093 Where the submatrices A,B,C are owned by proc0, D,E,F are 2094 owned by proc1, G,H,I are owned by proc2. 2095 2096 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2097 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2098 The 'M','N' parameters are 8,8, and have the same values on all procs. 2099 2100 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2101 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2102 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2103 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2104 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2105 matrix, ans [DF] as another SeqAIJ matrix. 2106 2107 When d_nz, o_nz parameters are specified, d_nz storage elements are 2108 allocated for every row of the local diagonal submatrix, and o_nz 2109 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2110 One way to choose d_nz and o_nz is to use the max nonzerors per local 2111 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2112 In this case, the values of d_nz,o_nz are: 2113 .vb 2114 proc0 : dnz = 2, o_nz = 2 2115 proc1 : dnz = 3, o_nz = 2 2116 proc2 : dnz = 1, o_nz = 4 2117 .ve 2118 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2119 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2120 for proc3. i.e we are using 12+15+10=37 storage locations to store 2121 34 values. 2122 2123 When d_nnz, o_nnz parameters are specified, the storage is specified 2124 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2125 In the above case the values for d_nnz,o_nnz are: 2126 .vb 2127 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2128 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2129 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2130 .ve 2131 Here the space allocated is sum of all the above values i.e 34, and 2132 hence pre-allocation is perfect. 2133 2134 Level: intermediate 2135 2136 .keywords: matrix, aij, compressed row, sparse, parallel 2137 2138 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2139 @*/ 2140 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2141 { 2142 Mat_MPIAIJ *b; 2143 int ierr,i; 2144 PetscTruth flg2; 2145 2146 PetscFunctionBegin; 2147 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2148 if (!flg2) PetscFunctionReturn(0); 2149 B->preallocated = PETSC_TRUE; 2150 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2151 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2152 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2153 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2154 if (d_nnz) { 2155 for (i=0; i<B->m; i++) { 2156 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2157 } 2158 } 2159 if (o_nnz) { 2160 for (i=0; i<B->m; i++) { 2161 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2162 } 2163 } 2164 b = (Mat_MPIAIJ*)B->data; 2165 2166 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2167 PetscLogObjectParent(B,b->A); 2168 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2169 PetscLogObjectParent(B,b->B); 2170 2171 PetscFunctionReturn(0); 2172 } 2173 2174 #undef __FUNCT__ 2175 #define __FUNCT__ "MatCreateMPIAIJ" 2176 /*@C 2177 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2178 (the default parallel PETSc format). For good matrix assembly performance 2179 the user should preallocate the matrix storage by setting the parameters 2180 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2181 performance can be increased by more than a factor of 50. 2182 2183 Collective on MPI_Comm 2184 2185 Input Parameters: 2186 + comm - MPI communicator 2187 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2188 This value should be the same as the local size used in creating the 2189 y vector for the matrix-vector product y = Ax. 2190 . n - This value should be the same as the local size used in creating the 2191 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2192 calculated if N is given) For square matrices n is almost always m. 2193 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2194 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2195 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2196 (same value is used for all local rows) 2197 . d_nnz - array containing the number of nonzeros in the various rows of the 2198 DIAGONAL portion of the local submatrix (possibly different for each row) 2199 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2200 The size of this array is equal to the number of local rows, i.e 'm'. 2201 You must leave room for the diagonal entry even if it is zero. 2202 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2203 submatrix (same value is used for all local rows). 2204 - o_nnz - array containing the number of nonzeros in the various rows of the 2205 OFF-DIAGONAL portion of the local submatrix (possibly different for 2206 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2207 structure. The size of this array is equal to the number 2208 of local rows, i.e 'm'. 2209 2210 Output Parameter: 2211 . A - the matrix 2212 2213 Notes: 2214 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2215 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2216 storage requirements for this matrix. 2217 2218 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2219 processor than it must be used on all processors that share the object for 2220 that argument. 2221 2222 The AIJ format (also called the Yale sparse matrix format or 2223 compressed row storage), is fully compatible with standard Fortran 77 2224 storage. That is, the stored row and column indices can begin at 2225 either one (as in Fortran) or zero. See the users manual for details. 2226 2227 The user MUST specify either the local or global matrix dimensions 2228 (possibly both). 2229 2230 The parallel matrix is partitioned such that the first m0 rows belong to 2231 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2232 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2233 2234 The DIAGONAL portion of the local submatrix of a processor can be defined 2235 as the submatrix which is obtained by extraction the part corresponding 2236 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2237 first row that belongs to the processor, and r2 is the last row belonging 2238 to the this processor. This is a square mxm matrix. The remaining portion 2239 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2240 2241 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2242 2243 By default, this format uses inodes (identical nodes) when possible. 2244 We search for consecutive rows with the same nonzero structure, thereby 2245 reusing matrix information to achieve increased efficiency. 2246 2247 Options Database Keys: 2248 + -mat_aij_no_inode - Do not use inodes 2249 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2250 - -mat_aij_oneindex - Internally use indexing starting at 1 2251 rather than 0. Note that when calling MatSetValues(), 2252 the user still MUST index entries starting at 0! 2253 2254 2255 Example usage: 2256 2257 Consider the following 8x8 matrix with 34 non-zero values, that is 2258 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2259 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2260 as follows: 2261 2262 .vb 2263 1 2 0 | 0 3 0 | 0 4 2264 Proc0 0 5 6 | 7 0 0 | 8 0 2265 9 0 10 | 11 0 0 | 12 0 2266 ------------------------------------- 2267 13 0 14 | 15 16 17 | 0 0 2268 Proc1 0 18 0 | 19 20 21 | 0 0 2269 0 0 0 | 22 23 0 | 24 0 2270 ------------------------------------- 2271 Proc2 25 26 27 | 0 0 28 | 29 0 2272 30 0 0 | 31 32 33 | 0 34 2273 .ve 2274 2275 This can be represented as a collection of submatrices as: 2276 2277 .vb 2278 A B C 2279 D E F 2280 G H I 2281 .ve 2282 2283 Where the submatrices A,B,C are owned by proc0, D,E,F are 2284 owned by proc1, G,H,I are owned by proc2. 2285 2286 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2287 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2288 The 'M','N' parameters are 8,8, and have the same values on all procs. 2289 2290 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2291 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2292 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2293 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2294 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2295 matrix, ans [DF] as another SeqAIJ matrix. 2296 2297 When d_nz, o_nz parameters are specified, d_nz storage elements are 2298 allocated for every row of the local diagonal submatrix, and o_nz 2299 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2300 One way to choose d_nz and o_nz is to use the max nonzerors per local 2301 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2302 In this case, the values of d_nz,o_nz are: 2303 .vb 2304 proc0 : dnz = 2, o_nz = 2 2305 proc1 : dnz = 3, o_nz = 2 2306 proc2 : dnz = 1, o_nz = 4 2307 .ve 2308 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2309 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2310 for proc3. i.e we are using 12+15+10=37 storage locations to store 2311 34 values. 2312 2313 When d_nnz, o_nnz parameters are specified, the storage is specified 2314 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2315 In the above case the values for d_nnz,o_nnz are: 2316 .vb 2317 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2318 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2319 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2320 .ve 2321 Here the space allocated is sum of all the above values i.e 34, and 2322 hence pre-allocation is perfect. 2323 2324 Level: intermediate 2325 2326 .keywords: matrix, aij, compressed row, sparse, parallel 2327 2328 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2329 @*/ 2330 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2331 { 2332 int ierr,size; 2333 2334 PetscFunctionBegin; 2335 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2336 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2337 if (size > 1) { 2338 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2339 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2340 } else { 2341 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2342 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2343 } 2344 PetscFunctionReturn(0); 2345 } 2346 2347 #undef __FUNCT__ 2348 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2349 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2350 { 2351 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2352 PetscFunctionBegin; 2353 *Ad = a->A; 2354 *Ao = a->B; 2355 *colmap = a->garray; 2356 PetscFunctionReturn(0); 2357 } 2358 2359 #undef __FUNCT__ 2360 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2361 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2362 { 2363 int ierr; 2364 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2365 2366 PetscFunctionBegin; 2367 if (coloring->ctype == IS_COLORING_LOCAL) { 2368 int *allcolors,*colors,i; 2369 ISColoring ocoloring; 2370 2371 /* set coloring for diagonal portion */ 2372 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2373 2374 /* set coloring for off-diagonal portion */ 2375 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2376 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2377 for (i=0; i<a->B->n; i++) { 2378 colors[i] = allcolors[a->garray[i]]; 2379 } 2380 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2381 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2382 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2383 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2384 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2385 int *colors,i,*larray; 2386 ISColoring ocoloring; 2387 2388 /* set coloring for diagonal portion */ 2389 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2390 for (i=0; i<a->A->n; i++) { 2391 larray[i] = i + a->cstart; 2392 } 2393 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2394 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2395 for (i=0; i<a->A->n; i++) { 2396 colors[i] = coloring->colors[larray[i]]; 2397 } 2398 ierr = PetscFree(larray);CHKERRQ(ierr); 2399 ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2400 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2401 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2402 2403 /* set coloring for off-diagonal portion */ 2404 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2405 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2406 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2407 for (i=0; i<a->B->n; i++) { 2408 colors[i] = coloring->colors[larray[i]]; 2409 } 2410 ierr = PetscFree(larray);CHKERRQ(ierr); 2411 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2412 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2413 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2414 } else { 2415 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2416 } 2417 2418 PetscFunctionReturn(0); 2419 } 2420 2421 #undef __FUNCT__ 2422 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2423 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2424 { 2425 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2426 int ierr; 2427 2428 PetscFunctionBegin; 2429 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2430 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2431 PetscFunctionReturn(0); 2432 } 2433 2434 #undef __FUNCT__ 2435 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2436 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2437 { 2438 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2439 int ierr; 2440 2441 PetscFunctionBegin; 2442 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2443 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2444 PetscFunctionReturn(0); 2445 } 2446