1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 PetscScalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 PetscScalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 PetscScalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 PetscScalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 PetscScalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 PetscScalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 int i,j,rstart,ncols,n,ierr,flg; 354 int *row,*col,other_disassembled; 355 PetscScalar *val; 356 InsertMode addv = mat->insertmode; 357 358 PetscFunctionBegin; 359 if (!aij->donotstash) { 360 while (1) { 361 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 362 if (!flg) break; 363 364 for (i=0; i<n;) { 365 /* Now identify the consecutive vals belonging to the same row */ 366 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 367 if (j < n) ncols = j-i; 368 else ncols = n-i; 369 /* Now assemble all these values with a single function call */ 370 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 371 i = j; 372 } 373 } 374 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 375 } 376 377 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 378 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 379 380 /* determine if any processor has disassembled, if so we must 381 also disassemble ourselfs, in order that we may reassemble. */ 382 /* 383 if nonzero structure of submatrix B cannot change then we know that 384 no processor disassembled thus we can skip this stuff 385 */ 386 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 387 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 388 if (mat->was_assembled && !other_disassembled) { 389 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 390 } 391 } 392 393 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 398 399 if (aij->rowvalues) { 400 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 401 aij->rowvalues = 0; 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 408 int MatZeroEntries_MPIAIJ(Mat A) 409 { 410 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 411 int ierr; 412 413 PetscFunctionBegin; 414 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 415 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatZeroRows_MPIAIJ" 421 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 422 { 423 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 424 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 425 int *procs,*nprocs,j,idx,nsends,*work,row; 426 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 427 int *rvalues,tag = A->tag,count,base,slen,n,*source; 428 int *lens,imdex,*lrows,*values,rstart=l->rstart; 429 MPI_Comm comm = A->comm; 430 MPI_Request *send_waits,*recv_waits; 431 MPI_Status recv_status,*send_status; 432 IS istmp; 433 PetscTruth found; 434 435 PetscFunctionBegin; 436 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 437 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 438 439 /* first count number of contributors to each processor */ 440 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 441 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 442 procs = nprocs + size; 443 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 444 for (i=0; i<N; i++) { 445 idx = rows[i]; 446 found = PETSC_FALSE; 447 for (j=0; j<size; j++) { 448 if (idx >= owners[j] && idx < owners[j+1]) { 449 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 450 } 451 } 452 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 453 } 454 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 455 456 /* inform other processors of number of messages and max length*/ 457 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 458 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 459 nrecvs = work[size+rank]; 460 nmax = work[rank]; 461 ierr = PetscFree(work);CHKERRQ(ierr); 462 463 /* post receives: */ 464 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 465 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 466 for (i=0; i<nrecvs; i++) { 467 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 468 } 469 470 /* do sends: 471 1) starts[i] gives the starting index in svalues for stuff going to 472 the ith processor 473 */ 474 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 475 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 476 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 477 starts[0] = 0; 478 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 479 for (i=0; i<N; i++) { 480 svalues[starts[owner[i]]++] = rows[i]; 481 } 482 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 483 484 starts[0] = 0; 485 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 486 count = 0; 487 for (i=0; i<size; i++) { 488 if (procs[i]) { 489 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 490 } 491 } 492 ierr = PetscFree(starts);CHKERRQ(ierr); 493 494 base = owners[rank]; 495 496 /* wait on receives */ 497 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 498 source = lens + nrecvs; 499 count = nrecvs; slen = 0; 500 while (count) { 501 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 502 /* unpack receives into our local space */ 503 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 504 source[imdex] = recv_status.MPI_SOURCE; 505 lens[imdex] = n; 506 slen += n; 507 count--; 508 } 509 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 510 511 /* move the data into the send scatter */ 512 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 513 count = 0; 514 for (i=0; i<nrecvs; i++) { 515 values = rvalues + i*nmax; 516 for (j=0; j<lens[i]; j++) { 517 lrows[count++] = values[j] - base; 518 } 519 } 520 ierr = PetscFree(rvalues);CHKERRQ(ierr); 521 ierr = PetscFree(lens);CHKERRQ(ierr); 522 ierr = PetscFree(owner);CHKERRQ(ierr); 523 ierr = PetscFree(nprocs);CHKERRQ(ierr); 524 525 /* actually zap the local rows */ 526 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 527 PetscLogObjectParent(A,istmp); 528 529 /* 530 Zero the required rows. If the "diagonal block" of the matrix 531 is square and the user wishes to set the diagonal we use seperate 532 code so that MatSetValues() is not called for each diagonal allocating 533 new memory, thus calling lots of mallocs and slowing things down. 534 535 Contributed by: Mathew Knepley 536 */ 537 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 538 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 539 if (diag && (l->A->M == l->A->N)) { 540 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 541 } else if (diag) { 542 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 543 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 544 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 546 } 547 for (i = 0; i < slen; i++) { 548 row = lrows[i] + rstart; 549 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 550 } 551 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 552 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 553 } else { 554 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 555 } 556 ierr = ISDestroy(istmp);CHKERRQ(ierr); 557 ierr = PetscFree(lrows);CHKERRQ(ierr); 558 559 /* wait on sends */ 560 if (nsends) { 561 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 562 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 563 ierr = PetscFree(send_status);CHKERRQ(ierr); 564 } 565 ierr = PetscFree(send_waits);CHKERRQ(ierr); 566 ierr = PetscFree(svalues);CHKERRQ(ierr); 567 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "MatMult_MPIAIJ" 573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 574 { 575 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 576 int ierr,nt; 577 578 PetscFunctionBegin; 579 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 580 if (nt != A->n) { 581 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 582 } 583 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 584 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 585 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 586 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatMultAdd_MPIAIJ" 592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 593 { 594 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 595 int ierr; 596 597 PetscFunctionBegin; 598 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 599 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 600 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 601 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 607 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 608 { 609 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 610 int ierr; 611 612 PetscFunctionBegin; 613 /* do nondiagonal part */ 614 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 615 /* send it on its way */ 616 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 617 /* do local part */ 618 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 619 /* receive remote parts: note this assumes the values are not actually */ 620 /* inserted in yy until the next line, which is true for my implementation*/ 621 /* but is not perhaps always true. */ 622 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 628 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 629 { 630 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 631 int ierr; 632 633 PetscFunctionBegin; 634 /* do nondiagonal part */ 635 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 636 /* send it on its way */ 637 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 638 /* do local part */ 639 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 640 /* receive remote parts: note this assumes the values are not actually */ 641 /* inserted in yy until the next line, which is true for my implementation*/ 642 /* but is not perhaps always true. */ 643 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 /* 648 This only works correctly for square matrices where the subblock A->A is the 649 diagonal block 650 */ 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 654 { 655 int ierr; 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 658 PetscFunctionBegin; 659 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 660 if (a->rstart != a->cstart || a->rend != a->cend) { 661 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 662 } 663 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatScale_MPIAIJ" 669 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 670 { 671 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 672 int ierr; 673 674 PetscFunctionBegin; 675 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 676 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatDestroy_MPIAIJ" 682 int MatDestroy_MPIAIJ(Mat mat) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 int ierr; 686 687 PetscFunctionBegin; 688 #if defined(PETSC_USE_LOG) 689 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 690 #endif 691 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 692 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 693 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 694 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 695 #if defined (PETSC_USE_CTABLE) 696 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 697 #else 698 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 699 #endif 700 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 701 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 702 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 703 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 704 ierr = PetscFree(aij);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 710 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 711 { 712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 713 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 714 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 715 PetscTruth isdraw,isascii,flg; 716 PetscViewer sviewer; 717 PetscViewerFormat format; 718 719 PetscFunctionBegin; 720 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 721 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 722 if (isascii) { 723 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 724 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 725 MatInfo info; 726 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 727 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 728 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 729 if (flg) { 730 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 731 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 732 } else { 733 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 734 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 735 } 736 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 737 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 738 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 739 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 740 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 741 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } else if (format == PETSC_VIEWER_ASCII_INFO) { 744 PetscFunctionReturn(0); 745 } 746 } else if (isdraw) { 747 PetscDraw draw; 748 PetscTruth isnull; 749 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 750 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 751 } 752 753 if (size == 1) { 754 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 755 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 756 } else { 757 /* assemble the entire matrix onto first processor. */ 758 Mat A; 759 Mat_SeqAIJ *Aloc; 760 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 761 PetscScalar *a; 762 763 if (!rank) { 764 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 765 } else { 766 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 767 } 768 PetscLogObjectParent(mat,A); 769 770 /* copy over the A part */ 771 Aloc = (Mat_SeqAIJ*)aij->A->data; 772 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 773 row = aij->rstart; 774 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 775 for (i=0; i<m; i++) { 776 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 777 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 778 } 779 aj = Aloc->j; 780 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 781 782 /* copy over the B part */ 783 Aloc = (Mat_SeqAIJ*)aij->B->data; 784 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 785 row = aij->rstart; 786 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 787 ct = cols; 788 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 789 for (i=0; i<m; i++) { 790 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 791 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 792 } 793 ierr = PetscFree(ct);CHKERRQ(ierr); 794 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 795 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 796 /* 797 Everyone has to call to draw the matrix since the graphics waits are 798 synchronized across all processors that share the PetscDraw object 799 */ 800 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 801 if (!rank) { 802 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 803 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 804 } 805 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 806 ierr = MatDestroy(A);CHKERRQ(ierr); 807 } 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "MatView_MPIAIJ" 813 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 814 { 815 int ierr; 816 PetscTruth isascii,isdraw,issocket,isbinary; 817 818 PetscFunctionBegin; 819 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 820 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 821 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 822 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 823 if (isascii || isdraw || isbinary || issocket) { 824 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 825 } else { 826 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 827 } 828 PetscFunctionReturn(0); 829 } 830 831 /* 832 This has to provide several versions. 833 834 2) a) use only local smoothing updating outer values only once. 835 b) local smoothing updating outer values each inner iteration 836 3) color updating out values betwen colors. 837 */ 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatRelax_MPIAIJ" 840 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx) 841 { 842 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 843 Mat AA = mat->A,BB = mat->B; 844 Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data; 845 PetscScalar *b,*x,*xs,*ls,d,*v,sum; 846 int ierr,*idx,*diag; 847 int n = matin->n,m = matin->m,i,shift = A->indexshift; 848 849 PetscFunctionBegin; 850 if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);} 851 diag = A->diag; 852 853 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 854 if (flag & SOR_ZERO_INITIAL_GUESS) { 855 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,1,xx);CHKERRQ(ierr); 856 its--; 857 } 858 859 while (its--) { 860 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 861 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 862 863 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 864 if (xx != bb) { 865 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 866 } else { 867 b = x; 868 } 869 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 870 871 xs = x + shift; 872 ls = ls + shift; /* shift by one for index start of 1 */ 873 874 /* go down through the rows */ 875 for (i=0; i<m; i++) { 876 n = A->i[i+1] - A->i[i]; 877 PetscLogFlops(4*n+3); 878 idx = A->j + A->i[i] + shift; 879 v = A->a + A->i[i] + shift; 880 sum = b[i]; 881 SPARSEDENSEMDOT(sum,xs,v,idx,n); 882 d = fshift + A->a[diag[i]+shift]; 883 n = B->i[i+1] - B->i[i]; 884 idx = B->j + B->i[i] + shift; 885 v = B->a + B->i[i] + shift; 886 SPARSEDENSEMDOT(sum,ls,v,idx,n); 887 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 888 } 889 /* come up through the rows */ 890 for (i=m-1; i>-1; i--) { 891 n = A->i[i+1] - A->i[i]; 892 PetscLogFlops(4*n+3); 893 idx = A->j + A->i[i] + shift; 894 v = A->a + A->i[i] + shift; 895 sum = b[i]; 896 SPARSEDENSEMDOT(sum,xs,v,idx,n); 897 d = fshift + A->a[diag[i]+shift]; 898 n = B->i[i+1] - B->i[i]; 899 idx = B->j + B->i[i] + shift; 900 v = B->a + B->i[i] + shift; 901 SPARSEDENSEMDOT(sum,ls,v,idx,n); 902 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 903 } 904 905 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 906 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 907 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 908 } 909 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 910 if (flag & SOR_ZERO_INITIAL_GUESS) { 911 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 912 its--; 913 } 914 while (its--) { 915 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 916 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 917 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 918 if (xx != bb) { 919 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 920 } else { 921 b = x; 922 } 923 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 924 xs = x + shift; /* shift by one for index start of 1 */ 925 ls = ls + shift; 926 927 for (i=0; i<m; i++) { 928 n = A->i[i+1] - A->i[i]; 929 PetscLogFlops(4*n+3); 930 idx = A->j + A->i[i] + shift; 931 v = A->a + A->i[i] + shift; 932 sum = b[i]; 933 SPARSEDENSEMDOT(sum,xs,v,idx,n); 934 d = fshift + A->a[diag[i]+shift]; 935 n = B->i[i+1] - B->i[i]; 936 idx = B->j + B->i[i] + shift; 937 v = B->a + B->i[i] + shift; 938 SPARSEDENSEMDOT(sum,ls,v,idx,n); 939 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 940 } 941 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 942 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 943 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 944 } 945 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 946 if (flag & SOR_ZERO_INITIAL_GUESS) { 947 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 948 its--; 949 } 950 while (its--) { 951 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 952 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 953 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 954 if (xx != bb) { 955 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 956 } else { 957 b = x; 958 } 959 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 960 xs = x + shift; /* shift by one for index start of 1 */ 961 ls = ls + shift; 962 963 for (i=m-1; i>-1; i--) { 964 n = A->i[i+1] - A->i[i]; 965 PetscLogFlops(4*n+3); 966 idx = A->j + A->i[i] + shift; 967 v = A->a + A->i[i] + shift; 968 sum = b[i]; 969 SPARSEDENSEMDOT(sum,xs,v,idx,n); 970 d = fshift + A->a[diag[i]+shift]; 971 n = B->i[i+1] - B->i[i]; 972 idx = B->j + B->i[i] + shift; 973 v = B->a + B->i[i] + shift; 974 SPARSEDENSEMDOT(sum,ls,v,idx,n); 975 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 976 } 977 978 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 979 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 980 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 981 } 982 } else { 983 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 984 } 985 PetscFunctionReturn(0); 986 } 987 988 #undef __FUNCT__ 989 #define __FUNCT__ "MatGetInfo_MPIAIJ" 990 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 991 { 992 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 993 Mat A = mat->A,B = mat->B; 994 int ierr; 995 PetscReal isend[5],irecv[5]; 996 997 PetscFunctionBegin; 998 info->block_size = 1.0; 999 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1000 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1001 isend[3] = info->memory; isend[4] = info->mallocs; 1002 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1003 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1004 isend[3] += info->memory; isend[4] += info->mallocs; 1005 if (flag == MAT_LOCAL) { 1006 info->nz_used = isend[0]; 1007 info->nz_allocated = isend[1]; 1008 info->nz_unneeded = isend[2]; 1009 info->memory = isend[3]; 1010 info->mallocs = isend[4]; 1011 } else if (flag == MAT_GLOBAL_MAX) { 1012 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 1013 info->nz_used = irecv[0]; 1014 info->nz_allocated = irecv[1]; 1015 info->nz_unneeded = irecv[2]; 1016 info->memory = irecv[3]; 1017 info->mallocs = irecv[4]; 1018 } else if (flag == MAT_GLOBAL_SUM) { 1019 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 1020 info->nz_used = irecv[0]; 1021 info->nz_allocated = irecv[1]; 1022 info->nz_unneeded = irecv[2]; 1023 info->memory = irecv[3]; 1024 info->mallocs = irecv[4]; 1025 } 1026 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1027 info->fill_ratio_needed = 0; 1028 info->factor_mallocs = 0; 1029 info->rows_global = (double)matin->M; 1030 info->columns_global = (double)matin->N; 1031 info->rows_local = (double)matin->m; 1032 info->columns_local = (double)matin->N; 1033 1034 PetscFunctionReturn(0); 1035 } 1036 1037 #undef __FUNCT__ 1038 #define __FUNCT__ "MatSetOption_MPIAIJ" 1039 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1040 { 1041 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1042 int ierr; 1043 1044 PetscFunctionBegin; 1045 switch (op) { 1046 case MAT_NO_NEW_NONZERO_LOCATIONS: 1047 case MAT_YES_NEW_NONZERO_LOCATIONS: 1048 case MAT_COLUMNS_UNSORTED: 1049 case MAT_COLUMNS_SORTED: 1050 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1051 case MAT_KEEP_ZEROED_ROWS: 1052 case MAT_NEW_NONZERO_LOCATION_ERR: 1053 case MAT_USE_INODES: 1054 case MAT_DO_NOT_USE_INODES: 1055 case MAT_IGNORE_ZERO_ENTRIES: 1056 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1057 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1058 break; 1059 case MAT_ROW_ORIENTED: 1060 a->roworiented = PETSC_TRUE; 1061 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1062 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1063 break; 1064 case MAT_ROWS_SORTED: 1065 case MAT_ROWS_UNSORTED: 1066 case MAT_YES_NEW_DIAGONALS: 1067 case MAT_USE_SINGLE_PRECISION_SOLVES: 1068 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1069 break; 1070 case MAT_COLUMN_ORIENTED: 1071 a->roworiented = PETSC_FALSE; 1072 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1073 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1074 break; 1075 case MAT_IGNORE_OFF_PROC_ENTRIES: 1076 a->donotstash = PETSC_TRUE; 1077 break; 1078 case MAT_NO_NEW_DIAGONALS: 1079 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1080 break; 1081 default: 1082 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1083 break; 1084 } 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatGetRow_MPIAIJ" 1090 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1091 { 1092 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1093 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1094 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1095 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1096 int *cmap,*idx_p; 1097 1098 PetscFunctionBegin; 1099 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1100 mat->getrowactive = PETSC_TRUE; 1101 1102 if (!mat->rowvalues && (idx || v)) { 1103 /* 1104 allocate enough space to hold information from the longest row. 1105 */ 1106 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1107 int max = 1,tmp; 1108 for (i=0; i<matin->m; i++) { 1109 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1110 if (max < tmp) { max = tmp; } 1111 } 1112 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1113 mat->rowindices = (int*)(mat->rowvalues + max); 1114 } 1115 1116 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1117 lrow = row - rstart; 1118 1119 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1120 if (!v) {pvA = 0; pvB = 0;} 1121 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1122 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1123 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1124 nztot = nzA + nzB; 1125 1126 cmap = mat->garray; 1127 if (v || idx) { 1128 if (nztot) { 1129 /* Sort by increasing column numbers, assuming A and B already sorted */ 1130 int imark = -1; 1131 if (v) { 1132 *v = v_p = mat->rowvalues; 1133 for (i=0; i<nzB; i++) { 1134 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1135 else break; 1136 } 1137 imark = i; 1138 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1139 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1140 } 1141 if (idx) { 1142 *idx = idx_p = mat->rowindices; 1143 if (imark > -1) { 1144 for (i=0; i<imark; i++) { 1145 idx_p[i] = cmap[cworkB[i]]; 1146 } 1147 } else { 1148 for (i=0; i<nzB; i++) { 1149 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1150 else break; 1151 } 1152 imark = i; 1153 } 1154 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1155 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1156 } 1157 } else { 1158 if (idx) *idx = 0; 1159 if (v) *v = 0; 1160 } 1161 } 1162 *nz = nztot; 1163 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1164 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 #undef __FUNCT__ 1169 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1170 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1171 { 1172 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1173 1174 PetscFunctionBegin; 1175 if (aij->getrowactive == PETSC_FALSE) { 1176 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1177 } 1178 aij->getrowactive = PETSC_FALSE; 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatNorm_MPIAIJ" 1184 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1185 { 1186 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1187 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1188 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1189 PetscReal sum = 0.0; 1190 PetscScalar *v; 1191 1192 PetscFunctionBegin; 1193 if (aij->size == 1) { 1194 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1195 } else { 1196 if (type == NORM_FROBENIUS) { 1197 v = amat->a; 1198 for (i=0; i<amat->nz; i++) { 1199 #if defined(PETSC_USE_COMPLEX) 1200 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1201 #else 1202 sum += (*v)*(*v); v++; 1203 #endif 1204 } 1205 v = bmat->a; 1206 for (i=0; i<bmat->nz; i++) { 1207 #if defined(PETSC_USE_COMPLEX) 1208 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1209 #else 1210 sum += (*v)*(*v); v++; 1211 #endif 1212 } 1213 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1214 *norm = sqrt(*norm); 1215 } else if (type == NORM_1) { /* max column norm */ 1216 PetscReal *tmp,*tmp2; 1217 int *jj,*garray = aij->garray; 1218 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1219 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1220 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1221 *norm = 0.0; 1222 v = amat->a; jj = amat->j; 1223 for (j=0; j<amat->nz; j++) { 1224 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1225 } 1226 v = bmat->a; jj = bmat->j; 1227 for (j=0; j<bmat->nz; j++) { 1228 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1229 } 1230 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1231 for (j=0; j<mat->N; j++) { 1232 if (tmp2[j] > *norm) *norm = tmp2[j]; 1233 } 1234 ierr = PetscFree(tmp);CHKERRQ(ierr); 1235 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1236 } else if (type == NORM_INFINITY) { /* max row norm */ 1237 PetscReal ntemp = 0.0; 1238 for (j=0; j<aij->A->m; j++) { 1239 v = amat->a + amat->i[j] + shift; 1240 sum = 0.0; 1241 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1242 sum += PetscAbsScalar(*v); v++; 1243 } 1244 v = bmat->a + bmat->i[j] + shift; 1245 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1246 sum += PetscAbsScalar(*v); v++; 1247 } 1248 if (sum > ntemp) ntemp = sum; 1249 } 1250 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1251 } else { 1252 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1253 } 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatTranspose_MPIAIJ" 1260 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1261 { 1262 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1263 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1264 int ierr,shift = Aloc->indexshift; 1265 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1266 Mat B; 1267 PetscScalar *array; 1268 1269 PetscFunctionBegin; 1270 if (!matout && M != N) { 1271 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1272 } 1273 1274 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1275 1276 /* copy over the A part */ 1277 Aloc = (Mat_SeqAIJ*)a->A->data; 1278 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1279 row = a->rstart; 1280 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1281 for (i=0; i<m; i++) { 1282 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1283 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1284 } 1285 aj = Aloc->j; 1286 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1287 1288 /* copy over the B part */ 1289 Aloc = (Mat_SeqAIJ*)a->B->data; 1290 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1291 row = a->rstart; 1292 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1293 ct = cols; 1294 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1295 for (i=0; i<m; i++) { 1296 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1297 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1298 } 1299 ierr = PetscFree(ct);CHKERRQ(ierr); 1300 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1301 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1302 if (matout) { 1303 *matout = B; 1304 } else { 1305 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1306 } 1307 PetscFunctionReturn(0); 1308 } 1309 1310 #undef __FUNCT__ 1311 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1312 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1313 { 1314 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1315 Mat a = aij->A,b = aij->B; 1316 int ierr,s1,s2,s3; 1317 1318 PetscFunctionBegin; 1319 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1320 if (rr) { 1321 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1322 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1323 /* Overlap communication with computation. */ 1324 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1325 } 1326 if (ll) { 1327 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1328 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1329 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1330 } 1331 /* scale the diagonal block */ 1332 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1333 1334 if (rr) { 1335 /* Do a scatter end and then right scale the off-diagonal block */ 1336 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1337 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1338 } 1339 1340 PetscFunctionReturn(0); 1341 } 1342 1343 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1346 int MatPrintHelp_MPIAIJ(Mat A) 1347 { 1348 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1349 int ierr; 1350 1351 PetscFunctionBegin; 1352 if (!a->rank) { 1353 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1354 } 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1360 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1361 { 1362 PetscFunctionBegin; 1363 *bs = 1; 1364 PetscFunctionReturn(0); 1365 } 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1368 int MatSetUnfactored_MPIAIJ(Mat A) 1369 { 1370 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1371 int ierr; 1372 1373 PetscFunctionBegin; 1374 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377 1378 #undef __FUNCT__ 1379 #define __FUNCT__ "MatEqual_MPIAIJ" 1380 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1381 { 1382 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1383 Mat a,b,c,d; 1384 PetscTruth flg; 1385 int ierr; 1386 1387 PetscFunctionBegin; 1388 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1389 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1390 a = matA->A; b = matA->B; 1391 c = matB->A; d = matB->B; 1392 1393 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1394 if (flg == PETSC_TRUE) { 1395 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1396 } 1397 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 1401 #undef __FUNCT__ 1402 #define __FUNCT__ "MatCopy_MPIAIJ" 1403 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1404 { 1405 int ierr; 1406 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1407 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1408 PetscTruth flg; 1409 1410 PetscFunctionBegin; 1411 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1412 if (str != SAME_NONZERO_PATTERN || !flg) { 1413 /* because of the column compression in the off-processor part of the matrix a->B, 1414 the number of columns in a->B and b->B may be different, hence we cannot call 1415 the MatCopy() directly on the two parts. If need be, we can provide a more 1416 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1417 then copying the submatrices */ 1418 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1419 } else { 1420 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1421 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1422 } 1423 PetscFunctionReturn(0); 1424 } 1425 1426 #undef __FUNCT__ 1427 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1428 int MatSetUpPreallocation_MPIAIJ(Mat A) 1429 { 1430 int ierr; 1431 1432 PetscFunctionBegin; 1433 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1438 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1439 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1440 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1441 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1442 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1443 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1444 #endif 1445 1446 /* -------------------------------------------------------------------*/ 1447 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1448 MatGetRow_MPIAIJ, 1449 MatRestoreRow_MPIAIJ, 1450 MatMult_MPIAIJ, 1451 MatMultAdd_MPIAIJ, 1452 MatMultTranspose_MPIAIJ, 1453 MatMultTransposeAdd_MPIAIJ, 1454 0, 1455 0, 1456 0, 1457 0, 1458 0, 1459 0, 1460 MatRelax_MPIAIJ, 1461 MatTranspose_MPIAIJ, 1462 MatGetInfo_MPIAIJ, 1463 MatEqual_MPIAIJ, 1464 MatGetDiagonal_MPIAIJ, 1465 MatDiagonalScale_MPIAIJ, 1466 MatNorm_MPIAIJ, 1467 MatAssemblyBegin_MPIAIJ, 1468 MatAssemblyEnd_MPIAIJ, 1469 0, 1470 MatSetOption_MPIAIJ, 1471 MatZeroEntries_MPIAIJ, 1472 MatZeroRows_MPIAIJ, 1473 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1474 MatLUFactorSymbolic_MPIAIJ_TFS, 1475 #else 1476 0, 1477 #endif 1478 0, 1479 0, 1480 0, 1481 MatSetUpPreallocation_MPIAIJ, 1482 0, 1483 0, 1484 0, 1485 0, 1486 MatDuplicate_MPIAIJ, 1487 0, 1488 0, 1489 0, 1490 0, 1491 0, 1492 MatGetSubMatrices_MPIAIJ, 1493 MatIncreaseOverlap_MPIAIJ, 1494 MatGetValues_MPIAIJ, 1495 MatCopy_MPIAIJ, 1496 MatPrintHelp_MPIAIJ, 1497 MatScale_MPIAIJ, 1498 0, 1499 0, 1500 0, 1501 MatGetBlockSize_MPIAIJ, 1502 0, 1503 0, 1504 0, 1505 0, 1506 MatFDColoringCreate_MPIAIJ, 1507 0, 1508 MatSetUnfactored_MPIAIJ, 1509 0, 1510 0, 1511 MatGetSubMatrix_MPIAIJ, 1512 MatDestroy_MPIAIJ, 1513 MatView_MPIAIJ, 1514 MatGetPetscMaps_Petsc, 1515 0, 1516 0, 1517 0, 1518 0, 1519 0, 1520 0, 1521 0, 1522 0, 1523 MatSetColoring_MPIAIJ, 1524 MatSetValuesAdic_MPIAIJ, 1525 MatSetValuesAdifor_MPIAIJ 1526 }; 1527 1528 /* ----------------------------------------------------------------------------------------*/ 1529 1530 EXTERN_C_BEGIN 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1533 int MatStoreValues_MPIAIJ(Mat mat) 1534 { 1535 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1536 int ierr; 1537 1538 PetscFunctionBegin; 1539 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1540 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1541 PetscFunctionReturn(0); 1542 } 1543 EXTERN_C_END 1544 1545 EXTERN_C_BEGIN 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1548 int MatRetrieveValues_MPIAIJ(Mat mat) 1549 { 1550 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1551 int ierr; 1552 1553 PetscFunctionBegin; 1554 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1555 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1556 PetscFunctionReturn(0); 1557 } 1558 EXTERN_C_END 1559 1560 #include "petscpc.h" 1561 EXTERN_C_BEGIN 1562 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1563 EXTERN_C_END 1564 1565 EXTERN_C_BEGIN 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "MatCreate_MPIAIJ" 1568 int MatCreate_MPIAIJ(Mat B) 1569 { 1570 Mat_MPIAIJ *b; 1571 int ierr,i,size; 1572 1573 PetscFunctionBegin; 1574 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1575 1576 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1577 B->data = (void*)b; 1578 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1579 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1580 B->factor = 0; 1581 B->assembled = PETSC_FALSE; 1582 B->mapping = 0; 1583 1584 B->insertmode = NOT_SET_VALUES; 1585 b->size = size; 1586 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1587 1588 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1589 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1590 1591 /* the information in the maps duplicates the information computed below, eventually 1592 we should remove the duplicate information that is not contained in the maps */ 1593 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1594 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1595 1596 /* build local table of row and column ownerships */ 1597 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1598 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1599 b->cowners = b->rowners + b->size + 2; 1600 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1601 b->rowners[0] = 0; 1602 for (i=2; i<=b->size; i++) { 1603 b->rowners[i] += b->rowners[i-1]; 1604 } 1605 b->rstart = b->rowners[b->rank]; 1606 b->rend = b->rowners[b->rank+1]; 1607 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1608 b->cowners[0] = 0; 1609 for (i=2; i<=b->size; i++) { 1610 b->cowners[i] += b->cowners[i-1]; 1611 } 1612 b->cstart = b->cowners[b->rank]; 1613 b->cend = b->cowners[b->rank+1]; 1614 1615 /* build cache for off array entries formed */ 1616 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1617 b->donotstash = PETSC_FALSE; 1618 b->colmap = 0; 1619 b->garray = 0; 1620 b->roworiented = PETSC_TRUE; 1621 1622 /* stuff used for matrix vector multiply */ 1623 b->lvec = PETSC_NULL; 1624 b->Mvctx = PETSC_NULL; 1625 1626 /* stuff for MatGetRow() */ 1627 b->rowindices = 0; 1628 b->rowvalues = 0; 1629 b->getrowactive = PETSC_FALSE; 1630 1631 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1632 "MatStoreValues_MPIAIJ", 1633 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1634 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1635 "MatRetrieveValues_MPIAIJ", 1636 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1637 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1638 "MatGetDiagonalBlock_MPIAIJ", 1639 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1640 PetscFunctionReturn(0); 1641 } 1642 EXTERN_C_END 1643 1644 #undef __FUNCT__ 1645 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1646 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1647 { 1648 Mat mat; 1649 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1650 int ierr; 1651 1652 PetscFunctionBegin; 1653 *newmat = 0; 1654 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1655 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1656 a = (Mat_MPIAIJ*)mat->data; 1657 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1658 mat->factor = matin->factor; 1659 mat->assembled = PETSC_TRUE; 1660 mat->insertmode = NOT_SET_VALUES; 1661 mat->preallocated = PETSC_TRUE; 1662 1663 a->rstart = oldmat->rstart; 1664 a->rend = oldmat->rend; 1665 a->cstart = oldmat->cstart; 1666 a->cend = oldmat->cend; 1667 a->size = oldmat->size; 1668 a->rank = oldmat->rank; 1669 a->donotstash = oldmat->donotstash; 1670 a->roworiented = oldmat->roworiented; 1671 a->rowindices = 0; 1672 a->rowvalues = 0; 1673 a->getrowactive = PETSC_FALSE; 1674 1675 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1676 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1677 if (oldmat->colmap) { 1678 #if defined (PETSC_USE_CTABLE) 1679 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1680 #else 1681 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1682 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1683 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1684 #endif 1685 } else a->colmap = 0; 1686 if (oldmat->garray) { 1687 int len; 1688 len = oldmat->B->n; 1689 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1690 PetscLogObjectMemory(mat,len*sizeof(int)); 1691 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1692 } else a->garray = 0; 1693 1694 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1695 PetscLogObjectParent(mat,a->lvec); 1696 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1697 PetscLogObjectParent(mat,a->Mvctx); 1698 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1699 PetscLogObjectParent(mat,a->A); 1700 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1701 PetscLogObjectParent(mat,a->B); 1702 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1703 *newmat = mat; 1704 PetscFunctionReturn(0); 1705 } 1706 1707 #include "petscsys.h" 1708 1709 EXTERN_C_BEGIN 1710 #undef __FUNCT__ 1711 #define __FUNCT__ "MatLoad_MPIAIJ" 1712 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1713 { 1714 Mat A; 1715 PetscScalar *vals,*svals; 1716 MPI_Comm comm = ((PetscObject)viewer)->comm; 1717 MPI_Status status; 1718 int i,nz,ierr,j,rstart,rend,fd; 1719 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1720 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1721 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1722 1723 PetscFunctionBegin; 1724 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1725 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1726 if (!rank) { 1727 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1728 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1729 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1730 if (header[3] < 0) { 1731 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1732 } 1733 } 1734 1735 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1736 M = header[1]; N = header[2]; 1737 /* determine ownership of all rows */ 1738 m = M/size + ((M % size) > rank); 1739 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1740 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1741 rowners[0] = 0; 1742 for (i=2; i<=size; i++) { 1743 rowners[i] += rowners[i-1]; 1744 } 1745 rstart = rowners[rank]; 1746 rend = rowners[rank+1]; 1747 1748 /* distribute row lengths to all processors */ 1749 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1750 offlens = ourlens + (rend-rstart); 1751 if (!rank) { 1752 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1753 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1754 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1755 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1756 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1757 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1758 } else { 1759 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1760 } 1761 1762 if (!rank) { 1763 /* calculate the number of nonzeros on each processor */ 1764 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1765 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1766 for (i=0; i<size; i++) { 1767 for (j=rowners[i]; j< rowners[i+1]; j++) { 1768 procsnz[i] += rowlengths[j]; 1769 } 1770 } 1771 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1772 1773 /* determine max buffer needed and allocate it */ 1774 maxnz = 0; 1775 for (i=0; i<size; i++) { 1776 maxnz = PetscMax(maxnz,procsnz[i]); 1777 } 1778 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1779 1780 /* read in my part of the matrix column indices */ 1781 nz = procsnz[0]; 1782 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1783 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1784 1785 /* read in every one elses and ship off */ 1786 for (i=1; i<size; i++) { 1787 nz = procsnz[i]; 1788 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1789 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1790 } 1791 ierr = PetscFree(cols);CHKERRQ(ierr); 1792 } else { 1793 /* determine buffer space needed for message */ 1794 nz = 0; 1795 for (i=0; i<m; i++) { 1796 nz += ourlens[i]; 1797 } 1798 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1799 1800 /* receive message of column indices*/ 1801 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1802 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1803 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1804 } 1805 1806 /* determine column ownership if matrix is not square */ 1807 if (N != M) { 1808 n = N/size + ((N % size) > rank); 1809 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1810 cstart = cend - n; 1811 } else { 1812 cstart = rstart; 1813 cend = rend; 1814 n = cend - cstart; 1815 } 1816 1817 /* loop over local rows, determining number of off diagonal entries */ 1818 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1819 jj = 0; 1820 for (i=0; i<m; i++) { 1821 for (j=0; j<ourlens[i]; j++) { 1822 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1823 jj++; 1824 } 1825 } 1826 1827 /* create our matrix */ 1828 for (i=0; i<m; i++) { 1829 ourlens[i] -= offlens[i]; 1830 } 1831 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1832 A = *newmat; 1833 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1834 for (i=0; i<m; i++) { 1835 ourlens[i] += offlens[i]; 1836 } 1837 1838 if (!rank) { 1839 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1840 1841 /* read in my part of the matrix numerical values */ 1842 nz = procsnz[0]; 1843 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1844 1845 /* insert into matrix */ 1846 jj = rstart; 1847 smycols = mycols; 1848 svals = vals; 1849 for (i=0; i<m; i++) { 1850 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1851 smycols += ourlens[i]; 1852 svals += ourlens[i]; 1853 jj++; 1854 } 1855 1856 /* read in other processors and ship out */ 1857 for (i=1; i<size; i++) { 1858 nz = procsnz[i]; 1859 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1860 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1861 } 1862 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1863 } else { 1864 /* receive numeric values */ 1865 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1866 1867 /* receive message of values*/ 1868 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1869 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1870 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1871 1872 /* insert into matrix */ 1873 jj = rstart; 1874 smycols = mycols; 1875 svals = vals; 1876 for (i=0; i<m; i++) { 1877 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1878 smycols += ourlens[i]; 1879 svals += ourlens[i]; 1880 jj++; 1881 } 1882 } 1883 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1884 ierr = PetscFree(vals);CHKERRQ(ierr); 1885 ierr = PetscFree(mycols);CHKERRQ(ierr); 1886 ierr = PetscFree(rowners);CHKERRQ(ierr); 1887 1888 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1889 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1890 PetscFunctionReturn(0); 1891 } 1892 EXTERN_C_END 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1896 /* 1897 Not great since it makes two copies of the submatrix, first an SeqAIJ 1898 in local and then by concatenating the local matrices the end result. 1899 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1900 */ 1901 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1902 { 1903 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1904 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1905 Mat *local,M,Mreuse; 1906 PetscScalar *vwork,*aa; 1907 MPI_Comm comm = mat->comm; 1908 Mat_SeqAIJ *aij; 1909 1910 1911 PetscFunctionBegin; 1912 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1913 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1914 1915 if (call == MAT_REUSE_MATRIX) { 1916 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1917 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1918 local = &Mreuse; 1919 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1920 } else { 1921 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1922 Mreuse = *local; 1923 ierr = PetscFree(local);CHKERRQ(ierr); 1924 } 1925 1926 /* 1927 m - number of local rows 1928 n - number of columns (same on all processors) 1929 rstart - first row in new global matrix generated 1930 */ 1931 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1932 if (call == MAT_INITIAL_MATRIX) { 1933 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1934 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1935 ii = aij->i; 1936 jj = aij->j; 1937 1938 /* 1939 Determine the number of non-zeros in the diagonal and off-diagonal 1940 portions of the matrix in order to do correct preallocation 1941 */ 1942 1943 /* first get start and end of "diagonal" columns */ 1944 if (csize == PETSC_DECIDE) { 1945 nlocal = n/size + ((n % size) > rank); 1946 } else { 1947 nlocal = csize; 1948 } 1949 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1950 rstart = rend - nlocal; 1951 if (rank == size - 1 && rend != n) { 1952 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1953 } 1954 1955 /* next, compute all the lengths */ 1956 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1957 olens = dlens + m; 1958 for (i=0; i<m; i++) { 1959 jend = ii[i+1] - ii[i]; 1960 olen = 0; 1961 dlen = 0; 1962 for (j=0; j<jend; j++) { 1963 if (*jj < rstart || *jj >= rend) olen++; 1964 else dlen++; 1965 jj++; 1966 } 1967 olens[i] = olen; 1968 dlens[i] = dlen; 1969 } 1970 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1971 ierr = PetscFree(dlens);CHKERRQ(ierr); 1972 } else { 1973 int ml,nl; 1974 1975 M = *newmat; 1976 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1977 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1978 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1979 /* 1980 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1981 rather than the slower MatSetValues(). 1982 */ 1983 M->was_assembled = PETSC_TRUE; 1984 M->assembled = PETSC_FALSE; 1985 } 1986 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1987 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1988 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1989 ii = aij->i; 1990 jj = aij->j; 1991 aa = aij->a; 1992 for (i=0; i<m; i++) { 1993 row = rstart + i; 1994 nz = ii[i+1] - ii[i]; 1995 cwork = jj; jj += nz; 1996 vwork = aa; aa += nz; 1997 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1998 } 1999 2000 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2001 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2002 *newmat = M; 2003 2004 /* save submatrix used in processor for next request */ 2005 if (call == MAT_INITIAL_MATRIX) { 2006 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2007 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2008 } 2009 2010 PetscFunctionReturn(0); 2011 } 2012 2013 #undef __FUNCT__ 2014 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2015 /*@C 2016 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2017 (the default parallel PETSc format). For good matrix assembly performance 2018 the user should preallocate the matrix storage by setting the parameters 2019 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2020 performance can be increased by more than a factor of 50. 2021 2022 Collective on MPI_Comm 2023 2024 Input Parameters: 2025 + A - the matrix 2026 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2027 (same value is used for all local rows) 2028 . d_nnz - array containing the number of nonzeros in the various rows of the 2029 DIAGONAL portion of the local submatrix (possibly different for each row) 2030 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2031 The size of this array is equal to the number of local rows, i.e 'm'. 2032 You must leave room for the diagonal entry even if it is zero. 2033 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2034 submatrix (same value is used for all local rows). 2035 - o_nnz - array containing the number of nonzeros in the various rows of the 2036 OFF-DIAGONAL portion of the local submatrix (possibly different for 2037 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2038 structure. The size of this array is equal to the number 2039 of local rows, i.e 'm'. 2040 2041 The AIJ format (also called the Yale sparse matrix format or 2042 compressed row storage), is fully compatible with standard Fortran 77 2043 storage. That is, the stored row and column indices can begin at 2044 either one (as in Fortran) or zero. See the users manual for details. 2045 2046 The user MUST specify either the local or global matrix dimensions 2047 (possibly both). 2048 2049 The parallel matrix is partitioned such that the first m0 rows belong to 2050 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2051 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2052 2053 The DIAGONAL portion of the local submatrix of a processor can be defined 2054 as the submatrix which is obtained by extraction the part corresponding 2055 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2056 first row that belongs to the processor, and r2 is the last row belonging 2057 to the this processor. This is a square mxm matrix. The remaining portion 2058 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2059 2060 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2061 2062 By default, this format uses inodes (identical nodes) when possible. 2063 We search for consecutive rows with the same nonzero structure, thereby 2064 reusing matrix information to achieve increased efficiency. 2065 2066 Options Database Keys: 2067 + -mat_aij_no_inode - Do not use inodes 2068 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2069 - -mat_aij_oneindex - Internally use indexing starting at 1 2070 rather than 0. Note that when calling MatSetValues(), 2071 the user still MUST index entries starting at 0! 2072 2073 Example usage: 2074 2075 Consider the following 8x8 matrix with 34 non-zero values, that is 2076 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2077 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2078 as follows: 2079 2080 .vb 2081 1 2 0 | 0 3 0 | 0 4 2082 Proc0 0 5 6 | 7 0 0 | 8 0 2083 9 0 10 | 11 0 0 | 12 0 2084 ------------------------------------- 2085 13 0 14 | 15 16 17 | 0 0 2086 Proc1 0 18 0 | 19 20 21 | 0 0 2087 0 0 0 | 22 23 0 | 24 0 2088 ------------------------------------- 2089 Proc2 25 26 27 | 0 0 28 | 29 0 2090 30 0 0 | 31 32 33 | 0 34 2091 .ve 2092 2093 This can be represented as a collection of submatrices as: 2094 2095 .vb 2096 A B C 2097 D E F 2098 G H I 2099 .ve 2100 2101 Where the submatrices A,B,C are owned by proc0, D,E,F are 2102 owned by proc1, G,H,I are owned by proc2. 2103 2104 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2105 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2106 The 'M','N' parameters are 8,8, and have the same values on all procs. 2107 2108 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2109 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2110 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2111 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2112 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2113 matrix, ans [DF] as another SeqAIJ matrix. 2114 2115 When d_nz, o_nz parameters are specified, d_nz storage elements are 2116 allocated for every row of the local diagonal submatrix, and o_nz 2117 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2118 One way to choose d_nz and o_nz is to use the max nonzerors per local 2119 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2120 In this case, the values of d_nz,o_nz are: 2121 .vb 2122 proc0 : dnz = 2, o_nz = 2 2123 proc1 : dnz = 3, o_nz = 2 2124 proc2 : dnz = 1, o_nz = 4 2125 .ve 2126 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2127 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2128 for proc3. i.e we are using 12+15+10=37 storage locations to store 2129 34 values. 2130 2131 When d_nnz, o_nnz parameters are specified, the storage is specified 2132 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2133 In the above case the values for d_nnz,o_nnz are: 2134 .vb 2135 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2136 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2137 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2138 .ve 2139 Here the space allocated is sum of all the above values i.e 34, and 2140 hence pre-allocation is perfect. 2141 2142 Level: intermediate 2143 2144 .keywords: matrix, aij, compressed row, sparse, parallel 2145 2146 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2147 @*/ 2148 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2149 { 2150 Mat_MPIAIJ *b; 2151 int ierr,i; 2152 PetscTruth flg2; 2153 2154 PetscFunctionBegin; 2155 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2156 if (!flg2) PetscFunctionReturn(0); 2157 B->preallocated = PETSC_TRUE; 2158 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2159 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2160 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2161 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2162 if (d_nnz) { 2163 for (i=0; i<B->m; i++) { 2164 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2165 } 2166 } 2167 if (o_nnz) { 2168 for (i=0; i<B->m; i++) { 2169 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2170 } 2171 } 2172 b = (Mat_MPIAIJ*)B->data; 2173 2174 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2175 PetscLogObjectParent(B,b->A); 2176 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2177 PetscLogObjectParent(B,b->B); 2178 2179 PetscFunctionReturn(0); 2180 } 2181 2182 #undef __FUNCT__ 2183 #define __FUNCT__ "MatCreateMPIAIJ" 2184 /*@C 2185 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2186 (the default parallel PETSc format). For good matrix assembly performance 2187 the user should preallocate the matrix storage by setting the parameters 2188 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2189 performance can be increased by more than a factor of 50. 2190 2191 Collective on MPI_Comm 2192 2193 Input Parameters: 2194 + comm - MPI communicator 2195 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2196 This value should be the same as the local size used in creating the 2197 y vector for the matrix-vector product y = Ax. 2198 . n - This value should be the same as the local size used in creating the 2199 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2200 calculated if N is given) For square matrices n is almost always m. 2201 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2202 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2203 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2204 (same value is used for all local rows) 2205 . d_nnz - array containing the number of nonzeros in the various rows of the 2206 DIAGONAL portion of the local submatrix (possibly different for each row) 2207 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2208 The size of this array is equal to the number of local rows, i.e 'm'. 2209 You must leave room for the diagonal entry even if it is zero. 2210 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2211 submatrix (same value is used for all local rows). 2212 - o_nnz - array containing the number of nonzeros in the various rows of the 2213 OFF-DIAGONAL portion of the local submatrix (possibly different for 2214 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2215 structure. The size of this array is equal to the number 2216 of local rows, i.e 'm'. 2217 2218 Output Parameter: 2219 . A - the matrix 2220 2221 Notes: 2222 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2223 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2224 storage requirements for this matrix. 2225 2226 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2227 processor than it must be used on all processors that share the object for 2228 that argument. 2229 2230 The AIJ format (also called the Yale sparse matrix format or 2231 compressed row storage), is fully compatible with standard Fortran 77 2232 storage. That is, the stored row and column indices can begin at 2233 either one (as in Fortran) or zero. See the users manual for details. 2234 2235 The user MUST specify either the local or global matrix dimensions 2236 (possibly both). 2237 2238 The parallel matrix is partitioned such that the first m0 rows belong to 2239 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2240 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2241 2242 The DIAGONAL portion of the local submatrix of a processor can be defined 2243 as the submatrix which is obtained by extraction the part corresponding 2244 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2245 first row that belongs to the processor, and r2 is the last row belonging 2246 to the this processor. This is a square mxm matrix. The remaining portion 2247 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2248 2249 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2250 2251 By default, this format uses inodes (identical nodes) when possible. 2252 We search for consecutive rows with the same nonzero structure, thereby 2253 reusing matrix information to achieve increased efficiency. 2254 2255 Options Database Keys: 2256 + -mat_aij_no_inode - Do not use inodes 2257 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2258 - -mat_aij_oneindex - Internally use indexing starting at 1 2259 rather than 0. Note that when calling MatSetValues(), 2260 the user still MUST index entries starting at 0! 2261 2262 2263 Example usage: 2264 2265 Consider the following 8x8 matrix with 34 non-zero values, that is 2266 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2267 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2268 as follows: 2269 2270 .vb 2271 1 2 0 | 0 3 0 | 0 4 2272 Proc0 0 5 6 | 7 0 0 | 8 0 2273 9 0 10 | 11 0 0 | 12 0 2274 ------------------------------------- 2275 13 0 14 | 15 16 17 | 0 0 2276 Proc1 0 18 0 | 19 20 21 | 0 0 2277 0 0 0 | 22 23 0 | 24 0 2278 ------------------------------------- 2279 Proc2 25 26 27 | 0 0 28 | 29 0 2280 30 0 0 | 31 32 33 | 0 34 2281 .ve 2282 2283 This can be represented as a collection of submatrices as: 2284 2285 .vb 2286 A B C 2287 D E F 2288 G H I 2289 .ve 2290 2291 Where the submatrices A,B,C are owned by proc0, D,E,F are 2292 owned by proc1, G,H,I are owned by proc2. 2293 2294 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2295 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2296 The 'M','N' parameters are 8,8, and have the same values on all procs. 2297 2298 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2299 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2300 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2301 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2302 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2303 matrix, ans [DF] as another SeqAIJ matrix. 2304 2305 When d_nz, o_nz parameters are specified, d_nz storage elements are 2306 allocated for every row of the local diagonal submatrix, and o_nz 2307 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2308 One way to choose d_nz and o_nz is to use the max nonzerors per local 2309 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2310 In this case, the values of d_nz,o_nz are: 2311 .vb 2312 proc0 : dnz = 2, o_nz = 2 2313 proc1 : dnz = 3, o_nz = 2 2314 proc2 : dnz = 1, o_nz = 4 2315 .ve 2316 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2317 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2318 for proc3. i.e we are using 12+15+10=37 storage locations to store 2319 34 values. 2320 2321 When d_nnz, o_nnz parameters are specified, the storage is specified 2322 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2323 In the above case the values for d_nnz,o_nnz are: 2324 .vb 2325 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2326 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2327 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2328 .ve 2329 Here the space allocated is sum of all the above values i.e 34, and 2330 hence pre-allocation is perfect. 2331 2332 Level: intermediate 2333 2334 .keywords: matrix, aij, compressed row, sparse, parallel 2335 2336 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2337 @*/ 2338 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2339 { 2340 int ierr,size; 2341 2342 PetscFunctionBegin; 2343 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2344 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2345 if (size > 1) { 2346 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2347 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2348 } else { 2349 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2350 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2351 } 2352 PetscFunctionReturn(0); 2353 } 2354 2355 #undef __FUNCT__ 2356 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2357 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2358 { 2359 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2360 PetscFunctionBegin; 2361 *Ad = a->A; 2362 *Ao = a->B; 2363 *colmap = a->garray; 2364 PetscFunctionReturn(0); 2365 } 2366 2367 #undef __FUNCT__ 2368 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2369 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2370 { 2371 int ierr; 2372 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2373 2374 PetscFunctionBegin; 2375 if (coloring->ctype == IS_COLORING_LOCAL) { 2376 int *allcolors,*colors,i; 2377 ISColoring ocoloring; 2378 2379 /* set coloring for diagonal portion */ 2380 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2381 2382 /* set coloring for off-diagonal portion */ 2383 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2384 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2385 for (i=0; i<a->B->n; i++) { 2386 colors[i] = allcolors[a->garray[i]]; 2387 } 2388 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2389 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2390 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2391 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2392 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2393 int *colors,i,*larray; 2394 ISColoring ocoloring; 2395 2396 /* set coloring for diagonal portion */ 2397 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2398 for (i=0; i<a->A->n; i++) { 2399 larray[i] = i + a->cstart; 2400 } 2401 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2402 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2403 for (i=0; i<a->A->n; i++) { 2404 colors[i] = coloring->colors[larray[i]]; 2405 } 2406 ierr = PetscFree(larray);CHKERRQ(ierr); 2407 ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2408 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2409 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2410 2411 /* set coloring for off-diagonal portion */ 2412 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2413 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2414 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2415 for (i=0; i<a->B->n; i++) { 2416 colors[i] = coloring->colors[larray[i]]; 2417 } 2418 ierr = PetscFree(larray);CHKERRQ(ierr); 2419 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2420 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2421 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2422 } else { 2423 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2424 } 2425 2426 PetscFunctionReturn(0); 2427 } 2428 2429 #undef __FUNCT__ 2430 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2431 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2432 { 2433 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2434 int ierr; 2435 2436 PetscFunctionBegin; 2437 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2438 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2439 PetscFunctionReturn(0); 2440 } 2441 2442 #undef __FUNCT__ 2443 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2444 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2445 { 2446 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2447 int ierr; 2448 2449 PetscFunctionBegin; 2450 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2451 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2452 PetscFunctionReturn(0); 2453 } 2454