1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat); 14 15 /* 16 Local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 19 a slightly higher hash table cost; without it it is not scalable (each processor 20 has an order N integer array but is fast to acess. 21 */ 22 #undef __FUNCT__ 23 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 24 int CreateColmap_MPIAIJ_Private(Mat mat) 25 { 26 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 27 int n = aij->B->n,i,ierr; 28 29 PetscFunctionBegin; 30 #if defined (PETSC_USE_CTABLE) 31 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 32 for (i=0; i<n; i++){ 33 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 34 } 35 #else 36 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 37 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 38 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 39 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 40 #endif 41 PetscFunctionReturn(0); 42 } 43 44 #define CHUNKSIZE 15 45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 46 { \ 47 \ 48 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 49 rmax = aimax[row]; nrow = ailen[row]; \ 50 col1 = col - shift; \ 51 \ 52 low = 0; high = nrow; \ 53 while (high-low > 5) { \ 54 t = (low+high)/2; \ 55 if (rp[t] > col) high = t; \ 56 else low = t; \ 57 } \ 58 for (_i=low; _i<high; _i++) { \ 59 if (rp[_i] > col1) break; \ 60 if (rp[_i] == col1) { \ 61 if (addv == ADD_VALUES) ap[_i] += value; \ 62 else ap[_i] = value; \ 63 goto a_noinsert; \ 64 } \ 65 } \ 66 if (nonew == 1) goto a_noinsert; \ 67 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 68 if (nrow >= rmax) { \ 69 /* there is no extra room in row, therefore enlarge */ \ 70 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 71 PetscScalar *new_a; \ 72 \ 73 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 74 \ 75 /* malloc new storage space */ \ 76 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 77 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 78 new_j = (int*)(new_a + new_nz); \ 79 new_i = new_j + new_nz; \ 80 \ 81 /* copy over old data into new slots */ \ 82 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 83 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 84 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 85 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 86 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 87 len*sizeof(int));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 90 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 91 /* free up old matrix storage */ \ 92 \ 93 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 94 if (!a->singlemalloc) { \ 95 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 96 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 97 } \ 98 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 99 a->singlemalloc = PETSC_TRUE; \ 100 \ 101 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 102 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 103 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 104 a->maxnz += CHUNKSIZE; \ 105 a->reallocs++; \ 106 } \ 107 N = nrow++ - 1; a->nz++; \ 108 /* shift up all the later entries in this row */ \ 109 for (ii=N; ii>=_i; ii--) { \ 110 rp[ii+1] = rp[ii]; \ 111 ap[ii+1] = ap[ii]; \ 112 } \ 113 rp[_i] = col1; \ 114 ap[_i] = value; \ 115 a_noinsert: ; \ 116 ailen[row] = nrow; \ 117 } 118 119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 120 { \ 121 \ 122 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 123 rmax = bimax[row]; nrow = bilen[row]; \ 124 col1 = col - shift; \ 125 \ 126 low = 0; high = nrow; \ 127 while (high-low > 5) { \ 128 t = (low+high)/2; \ 129 if (rp[t] > col) high = t; \ 130 else low = t; \ 131 } \ 132 for (_i=low; _i<high; _i++) { \ 133 if (rp[_i] > col1) break; \ 134 if (rp[_i] == col1) { \ 135 if (addv == ADD_VALUES) ap[_i] += value; \ 136 else ap[_i] = value; \ 137 goto b_noinsert; \ 138 } \ 139 } \ 140 if (nonew == 1) goto b_noinsert; \ 141 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 142 if (nrow >= rmax) { \ 143 /* there is no extra room in row, therefore enlarge */ \ 144 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 145 PetscScalar *new_a; \ 146 \ 147 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 148 \ 149 /* malloc new storage space */ \ 150 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 151 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 152 new_j = (int*)(new_a + new_nz); \ 153 new_i = new_j + new_nz; \ 154 \ 155 /* copy over old data into new slots */ \ 156 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 157 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 158 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 159 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 160 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 161 len*sizeof(int));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 163 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 164 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 165 /* free up old matrix storage */ \ 166 \ 167 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 168 if (!b->singlemalloc) { \ 169 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 170 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 171 } \ 172 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 173 b->singlemalloc = PETSC_TRUE; \ 174 \ 175 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 176 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 177 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 178 b->maxnz += CHUNKSIZE; \ 179 b->reallocs++; \ 180 } \ 181 N = nrow++ - 1; b->nz++; \ 182 /* shift up all the later entries in this row */ \ 183 for (ii=N; ii>=_i; ii--) { \ 184 rp[ii+1] = rp[ii]; \ 185 ap[ii+1] = ap[ii]; \ 186 } \ 187 rp[_i] = col1; \ 188 ap[_i] = value; \ 189 b_noinsert: ; \ 190 bilen[row] = nrow; \ 191 } 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatSetValues_MPIAIJ" 195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 196 { 197 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 198 PetscScalar value; 199 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 200 int cstart = aij->cstart,cend = aij->cend,row,col; 201 PetscTruth roworiented = aij->roworiented; 202 203 /* Some Variables required in the macro */ 204 Mat A = aij->A; 205 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 206 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 207 PetscScalar *aa = a->a; 208 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 209 Mat B = aij->B; 210 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 211 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 212 PetscScalar *ba = b->a; 213 214 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 215 int nonew = a->nonew,shift = a->indexshift; 216 PetscScalar *ap; 217 218 PetscFunctionBegin; 219 for (i=0; i<m; i++) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_BOPT_g) 222 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 223 #endif 224 if (im[i] >= rstart && im[i] < rend) { 225 row = im[i] - rstart; 226 for (j=0; j<n; j++) { 227 if (in[j] >= cstart && in[j] < cend){ 228 col = in[j] - cstart; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 if (ignorezeroentries && value == 0.0) continue; 231 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 232 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 233 } else if (in[j] < 0) continue; 234 #if defined(PETSC_USE_BOPT_g) 235 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 236 #endif 237 else { 238 if (mat->was_assembled) { 239 if (!aij->colmap) { 240 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 241 } 242 #if defined (PETSC_USE_CTABLE) 243 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 244 col--; 245 #else 246 col = aij->colmap[in[j]] - 1; 247 #endif 248 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 249 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 250 col = in[j]; 251 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 252 B = aij->B; 253 b = (Mat_SeqAIJ*)B->data; 254 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 255 ba = b->a; 256 } 257 } else col = in[j]; 258 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259 if (ignorezeroentries && value == 0.0) continue; 260 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 261 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 262 } 263 } 264 } else { 265 if (!aij->donotstash) { 266 if (roworiented) { 267 if (ignorezeroentries && v[i*n] == 0.0) continue; 268 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 269 } else { 270 if (ignorezeroentries && v[i] == 0.0) continue; 271 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 272 } 273 } 274 } 275 } 276 PetscFunctionReturn(0); 277 } 278 279 #undef __FUNCT__ 280 #define __FUNCT__ "MatGetValues_MPIAIJ" 281 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 282 { 283 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 284 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 285 int cstart = aij->cstart,cend = aij->cend,row,col; 286 287 PetscFunctionBegin; 288 for (i=0; i<m; i++) { 289 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 290 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 291 if (idxm[i] >= rstart && idxm[i] < rend) { 292 row = idxm[i] - rstart; 293 for (j=0; j<n; j++) { 294 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 295 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 296 if (idxn[j] >= cstart && idxn[j] < cend){ 297 col = idxn[j] - cstart; 298 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 299 } else { 300 if (!aij->colmap) { 301 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 302 } 303 #if defined (PETSC_USE_CTABLE) 304 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 305 col --; 306 #else 307 col = aij->colmap[idxn[j]] - 1; 308 #endif 309 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 310 else { 311 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 312 } 313 } 314 } 315 } else { 316 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 317 } 318 } 319 PetscFunctionReturn(0); 320 } 321 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 324 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 325 { 326 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 327 int ierr,nstash,reallocs; 328 InsertMode addv; 329 330 PetscFunctionBegin; 331 if (aij->donotstash) { 332 PetscFunctionReturn(0); 333 } 334 335 /* make sure all processors are either in INSERTMODE or ADDMODE */ 336 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 337 if (addv == (ADD_VALUES|INSERT_VALUES)) { 338 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 339 } 340 mat->insertmode = addv; /* in case this processor had no cache */ 341 342 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 343 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 344 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 345 PetscFunctionReturn(0); 346 } 347 348 349 #undef __FUNCT__ 350 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 351 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 352 { 353 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 354 int i,j,rstart,ncols,n,ierr,flg; 355 int *row,*col,other_disassembled; 356 PetscScalar *val; 357 InsertMode addv = mat->insertmode; 358 #if defined(PETSC_HAVE_SUPERLUDIST) 359 PetscTruth flag; 360 #endif 361 362 PetscFunctionBegin; 363 if (!aij->donotstash) { 364 while (1) { 365 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 366 if (!flg) break; 367 368 for (i=0; i<n;) { 369 /* Now identify the consecutive vals belonging to the same row */ 370 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 371 if (j < n) ncols = j-i; 372 else ncols = n-i; 373 /* Now assemble all these values with a single function call */ 374 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 375 i = j; 376 } 377 } 378 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 379 } 380 381 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 382 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 383 384 /* determine if any processor has disassembled, if so we must 385 also disassemble ourselfs, in order that we may reassemble. */ 386 /* 387 if nonzero structure of submatrix B cannot change then we know that 388 no processor disassembled thus we can skip this stuff 389 */ 390 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 391 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 392 if (mat->was_assembled && !other_disassembled) { 393 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 } 396 397 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 398 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 399 } 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 #if defined(PETSC_HAVE_SUPERLUDIST) 408 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 409 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 410 #endif 411 PetscFunctionReturn(0); 412 } 413 414 #undef __FUNCT__ 415 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 416 int MatZeroEntries_MPIAIJ(Mat A) 417 { 418 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 419 int ierr; 420 421 PetscFunctionBegin; 422 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 423 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatZeroRows_MPIAIJ" 429 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 430 { 431 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 432 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 433 int *procs,*nprocs,j,idx,nsends,*work,row; 434 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 435 int *rvalues,tag = A->tag,count,base,slen,n,*source; 436 int *lens,imdex,*lrows,*values,rstart=l->rstart; 437 MPI_Comm comm = A->comm; 438 MPI_Request *send_waits,*recv_waits; 439 MPI_Status recv_status,*send_status; 440 IS istmp; 441 PetscTruth found; 442 443 PetscFunctionBegin; 444 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 445 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 446 447 /* first count number of contributors to each processor */ 448 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 449 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 450 procs = nprocs + size; 451 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 452 for (i=0; i<N; i++) { 453 idx = rows[i]; 454 found = PETSC_FALSE; 455 for (j=0; j<size; j++) { 456 if (idx >= owners[j] && idx < owners[j+1]) { 457 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 458 } 459 } 460 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 461 } 462 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 463 464 /* inform other processors of number of messages and max length*/ 465 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 466 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 467 nrecvs = work[size+rank]; 468 nmax = work[rank]; 469 ierr = PetscFree(work);CHKERRQ(ierr); 470 471 /* post receives: */ 472 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 473 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 474 for (i=0; i<nrecvs; i++) { 475 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 476 } 477 478 /* do sends: 479 1) starts[i] gives the starting index in svalues for stuff going to 480 the ith processor 481 */ 482 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 483 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 484 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 485 starts[0] = 0; 486 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 487 for (i=0; i<N; i++) { 488 svalues[starts[owner[i]]++] = rows[i]; 489 } 490 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 491 492 starts[0] = 0; 493 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 494 count = 0; 495 for (i=0; i<size; i++) { 496 if (procs[i]) { 497 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 498 } 499 } 500 ierr = PetscFree(starts);CHKERRQ(ierr); 501 502 base = owners[rank]; 503 504 /* wait on receives */ 505 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 506 source = lens + nrecvs; 507 count = nrecvs; slen = 0; 508 while (count) { 509 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 510 /* unpack receives into our local space */ 511 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 512 source[imdex] = recv_status.MPI_SOURCE; 513 lens[imdex] = n; 514 slen += n; 515 count--; 516 } 517 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 518 519 /* move the data into the send scatter */ 520 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 521 count = 0; 522 for (i=0; i<nrecvs; i++) { 523 values = rvalues + i*nmax; 524 for (j=0; j<lens[i]; j++) { 525 lrows[count++] = values[j] - base; 526 } 527 } 528 ierr = PetscFree(rvalues);CHKERRQ(ierr); 529 ierr = PetscFree(lens);CHKERRQ(ierr); 530 ierr = PetscFree(owner);CHKERRQ(ierr); 531 ierr = PetscFree(nprocs);CHKERRQ(ierr); 532 533 /* actually zap the local rows */ 534 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 535 PetscLogObjectParent(A,istmp); 536 537 /* 538 Zero the required rows. If the "diagonal block" of the matrix 539 is square and the user wishes to set the diagonal we use seperate 540 code so that MatSetValues() is not called for each diagonal allocating 541 new memory, thus calling lots of mallocs and slowing things down. 542 543 Contributed by: Mathew Knepley 544 */ 545 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 546 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 547 if (diag && (l->A->M == l->A->N)) { 548 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 549 } else if (diag) { 550 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 551 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 552 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 553 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 554 } 555 for (i = 0; i < slen; i++) { 556 row = lrows[i] + rstart; 557 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 558 } 559 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 560 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 561 } else { 562 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 563 } 564 ierr = ISDestroy(istmp);CHKERRQ(ierr); 565 ierr = PetscFree(lrows);CHKERRQ(ierr); 566 567 /* wait on sends */ 568 if (nsends) { 569 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 570 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 571 ierr = PetscFree(send_status);CHKERRQ(ierr); 572 } 573 ierr = PetscFree(send_waits);CHKERRQ(ierr); 574 ierr = PetscFree(svalues);CHKERRQ(ierr); 575 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatMult_MPIAIJ" 581 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 582 { 583 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 584 int ierr,nt; 585 586 PetscFunctionBegin; 587 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 588 if (nt != A->n) { 589 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 590 } 591 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 592 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 593 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 594 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "MatMultAdd_MPIAIJ" 600 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 601 { 602 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 603 int ierr; 604 605 PetscFunctionBegin; 606 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 607 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 608 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 609 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 610 PetscFunctionReturn(0); 611 } 612 613 #undef __FUNCT__ 614 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 615 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 616 { 617 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 618 int ierr; 619 620 PetscFunctionBegin; 621 /* do nondiagonal part */ 622 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 623 /* send it on its way */ 624 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 625 /* do local part */ 626 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 627 /* receive remote parts: note this assumes the values are not actually */ 628 /* inserted in yy until the next line, which is true for my implementation*/ 629 /* but is not perhaps always true. */ 630 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 636 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 637 { 638 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 639 int ierr; 640 641 PetscFunctionBegin; 642 /* do nondiagonal part */ 643 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 644 /* send it on its way */ 645 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 646 /* do local part */ 647 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 648 /* receive remote parts: note this assumes the values are not actually */ 649 /* inserted in yy until the next line, which is true for my implementation*/ 650 /* but is not perhaps always true. */ 651 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 652 PetscFunctionReturn(0); 653 } 654 655 /* 656 This only works correctly for square matrices where the subblock A->A is the 657 diagonal block 658 */ 659 #undef __FUNCT__ 660 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 661 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 662 { 663 int ierr; 664 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 665 666 PetscFunctionBegin; 667 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 668 if (a->rstart != a->cstart || a->rend != a->cend) { 669 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 670 } 671 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 672 PetscFunctionReturn(0); 673 } 674 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatScale_MPIAIJ" 677 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 678 { 679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 680 int ierr; 681 682 PetscFunctionBegin; 683 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 684 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 685 PetscFunctionReturn(0); 686 } 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatDestroy_MPIAIJ" 690 int MatDestroy_MPIAIJ(Mat mat) 691 { 692 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 693 int ierr; 694 695 PetscFunctionBegin; 696 #if defined(PETSC_USE_LOG) 697 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 698 #endif 699 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 700 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 701 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 702 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 703 #if defined (PETSC_USE_CTABLE) 704 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 705 #else 706 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 707 #endif 708 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 709 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 710 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 711 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 712 ierr = PetscFree(aij);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 718 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 722 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 723 PetscTruth isdraw,isascii,flg; 724 PetscViewer sviewer; 725 PetscViewerFormat format; 726 727 PetscFunctionBegin; 728 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 729 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 730 if (isascii) { 731 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 732 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 733 MatInfo info; 734 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 735 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 736 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 737 if (flg) { 738 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 739 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 740 } else { 741 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 742 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 743 } 744 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 745 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 746 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 747 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 748 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 749 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } else if (format == PETSC_VIEWER_ASCII_INFO) { 752 PetscFunctionReturn(0); 753 } 754 } else if (isdraw) { 755 PetscDraw draw; 756 PetscTruth isnull; 757 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 758 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 759 } 760 761 if (size == 1) { 762 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 763 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 764 } else { 765 /* assemble the entire matrix onto first processor. */ 766 Mat A; 767 Mat_SeqAIJ *Aloc; 768 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 769 PetscScalar *a; 770 771 if (!rank) { 772 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 773 } else { 774 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 775 } 776 PetscLogObjectParent(mat,A); 777 778 /* copy over the A part */ 779 Aloc = (Mat_SeqAIJ*)aij->A->data; 780 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 781 row = aij->rstart; 782 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 783 for (i=0; i<m; i++) { 784 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 785 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 786 } 787 aj = Aloc->j; 788 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 789 790 /* copy over the B part */ 791 Aloc = (Mat_SeqAIJ*)aij->B->data; 792 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 793 row = aij->rstart; 794 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 795 ct = cols; 796 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 797 for (i=0; i<m; i++) { 798 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 799 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 800 } 801 ierr = PetscFree(ct);CHKERRQ(ierr); 802 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 803 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 804 /* 805 Everyone has to call to draw the matrix since the graphics waits are 806 synchronized across all processors that share the PetscDraw object 807 */ 808 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 809 if (!rank) { 810 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 811 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 812 } 813 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 814 ierr = MatDestroy(A);CHKERRQ(ierr); 815 } 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNCT__ 820 #define __FUNCT__ "MatView_MPIAIJ" 821 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 822 { 823 int ierr; 824 PetscTruth isascii,isdraw,issocket,isbinary; 825 826 PetscFunctionBegin; 827 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 828 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 829 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 830 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 831 if (isascii || isdraw || isbinary || issocket) { 832 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 833 } else { 834 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 835 } 836 PetscFunctionReturn(0); 837 } 838 839 840 841 #undef __FUNCT__ 842 #define __FUNCT__ "MatRelax_MPIAIJ" 843 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 844 { 845 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 846 int ierr; 847 Vec bb1; 848 PetscScalar mone=-1.0; 849 850 PetscFunctionBegin; 851 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 852 853 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 854 855 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 856 if (flag & SOR_ZERO_INITIAL_GUESS) { 857 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 858 its--; 859 } 860 861 while (its--) { 862 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 863 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 864 865 /* update rhs: bb1 = bb - B*x */ 866 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 867 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 868 869 /* local sweep */ 870 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,PETSC_NULL,xx); 871 CHKERRQ(ierr); 872 } 873 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 874 if (flag & SOR_ZERO_INITIAL_GUESS) { 875 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 876 its--; 877 } 878 while (its--) { 879 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 880 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 881 882 /* update rhs: bb1 = bb - B*x */ 883 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 884 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 885 886 /* local sweep */ 887 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 888 CHKERRQ(ierr); 889 } 890 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 891 if (flag & SOR_ZERO_INITIAL_GUESS) { 892 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 893 its--; 894 } 895 while (its--) { 896 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 897 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 898 899 /* update rhs: bb1 = bb - B*x */ 900 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 901 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 902 903 /* local sweep */ 904 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 905 CHKERRQ(ierr); 906 } 907 } else { 908 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 909 } 910 911 ierr = VecDestroy(bb1);CHKERRQ(ierr); 912 PetscFunctionReturn(0); 913 } 914 915 #undef __FUNCT__ 916 #define __FUNCT__ "MatGetInfo_MPIAIJ" 917 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 918 { 919 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 920 Mat A = mat->A,B = mat->B; 921 int ierr; 922 PetscReal isend[5],irecv[5]; 923 924 PetscFunctionBegin; 925 info->block_size = 1.0; 926 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 927 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 928 isend[3] = info->memory; isend[4] = info->mallocs; 929 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 930 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 931 isend[3] += info->memory; isend[4] += info->mallocs; 932 if (flag == MAT_LOCAL) { 933 info->nz_used = isend[0]; 934 info->nz_allocated = isend[1]; 935 info->nz_unneeded = isend[2]; 936 info->memory = isend[3]; 937 info->mallocs = isend[4]; 938 } else if (flag == MAT_GLOBAL_MAX) { 939 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 940 info->nz_used = irecv[0]; 941 info->nz_allocated = irecv[1]; 942 info->nz_unneeded = irecv[2]; 943 info->memory = irecv[3]; 944 info->mallocs = irecv[4]; 945 } else if (flag == MAT_GLOBAL_SUM) { 946 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 947 info->nz_used = irecv[0]; 948 info->nz_allocated = irecv[1]; 949 info->nz_unneeded = irecv[2]; 950 info->memory = irecv[3]; 951 info->mallocs = irecv[4]; 952 } 953 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 954 info->fill_ratio_needed = 0; 955 info->factor_mallocs = 0; 956 info->rows_global = (double)matin->M; 957 info->columns_global = (double)matin->N; 958 info->rows_local = (double)matin->m; 959 info->columns_local = (double)matin->N; 960 961 PetscFunctionReturn(0); 962 } 963 964 #undef __FUNCT__ 965 #define __FUNCT__ "MatSetOption_MPIAIJ" 966 int MatSetOption_MPIAIJ(Mat A,MatOption op) 967 { 968 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 969 int ierr; 970 971 PetscFunctionBegin; 972 switch (op) { 973 case MAT_NO_NEW_NONZERO_LOCATIONS: 974 case MAT_YES_NEW_NONZERO_LOCATIONS: 975 case MAT_COLUMNS_UNSORTED: 976 case MAT_COLUMNS_SORTED: 977 case MAT_NEW_NONZERO_ALLOCATION_ERR: 978 case MAT_KEEP_ZEROED_ROWS: 979 case MAT_NEW_NONZERO_LOCATION_ERR: 980 case MAT_USE_INODES: 981 case MAT_DO_NOT_USE_INODES: 982 case MAT_IGNORE_ZERO_ENTRIES: 983 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 984 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 985 break; 986 case MAT_ROW_ORIENTED: 987 a->roworiented = PETSC_TRUE; 988 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 989 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 990 break; 991 case MAT_ROWS_SORTED: 992 case MAT_ROWS_UNSORTED: 993 case MAT_YES_NEW_DIAGONALS: 994 case MAT_USE_SINGLE_PRECISION_SOLVES: 995 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 996 break; 997 case MAT_COLUMN_ORIENTED: 998 a->roworiented = PETSC_FALSE; 999 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1000 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1001 break; 1002 case MAT_IGNORE_OFF_PROC_ENTRIES: 1003 a->donotstash = PETSC_TRUE; 1004 break; 1005 case MAT_NO_NEW_DIAGONALS: 1006 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1007 break; 1008 default: 1009 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1010 break; 1011 } 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatGetRow_MPIAIJ" 1017 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1018 { 1019 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1020 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1021 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1022 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1023 int *cmap,*idx_p; 1024 1025 PetscFunctionBegin; 1026 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1027 mat->getrowactive = PETSC_TRUE; 1028 1029 if (!mat->rowvalues && (idx || v)) { 1030 /* 1031 allocate enough space to hold information from the longest row. 1032 */ 1033 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1034 int max = 1,tmp; 1035 for (i=0; i<matin->m; i++) { 1036 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1037 if (max < tmp) { max = tmp; } 1038 } 1039 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1040 mat->rowindices = (int*)(mat->rowvalues + max); 1041 } 1042 1043 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1044 lrow = row - rstart; 1045 1046 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1047 if (!v) {pvA = 0; pvB = 0;} 1048 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1049 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1050 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1051 nztot = nzA + nzB; 1052 1053 cmap = mat->garray; 1054 if (v || idx) { 1055 if (nztot) { 1056 /* Sort by increasing column numbers, assuming A and B already sorted */ 1057 int imark = -1; 1058 if (v) { 1059 *v = v_p = mat->rowvalues; 1060 for (i=0; i<nzB; i++) { 1061 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1062 else break; 1063 } 1064 imark = i; 1065 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1066 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1067 } 1068 if (idx) { 1069 *idx = idx_p = mat->rowindices; 1070 if (imark > -1) { 1071 for (i=0; i<imark; i++) { 1072 idx_p[i] = cmap[cworkB[i]]; 1073 } 1074 } else { 1075 for (i=0; i<nzB; i++) { 1076 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1077 else break; 1078 } 1079 imark = i; 1080 } 1081 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1082 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1083 } 1084 } else { 1085 if (idx) *idx = 0; 1086 if (v) *v = 0; 1087 } 1088 } 1089 *nz = nztot; 1090 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1091 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1097 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1098 { 1099 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1100 1101 PetscFunctionBegin; 1102 if (aij->getrowactive == PETSC_FALSE) { 1103 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1104 } 1105 aij->getrowactive = PETSC_FALSE; 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "MatNorm_MPIAIJ" 1111 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1112 { 1113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1114 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1115 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1116 PetscReal sum = 0.0; 1117 PetscScalar *v; 1118 1119 PetscFunctionBegin; 1120 if (aij->size == 1) { 1121 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1122 } else { 1123 if (type == NORM_FROBENIUS) { 1124 v = amat->a; 1125 for (i=0; i<amat->nz; i++) { 1126 #if defined(PETSC_USE_COMPLEX) 1127 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1128 #else 1129 sum += (*v)*(*v); v++; 1130 #endif 1131 } 1132 v = bmat->a; 1133 for (i=0; i<bmat->nz; i++) { 1134 #if defined(PETSC_USE_COMPLEX) 1135 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1136 #else 1137 sum += (*v)*(*v); v++; 1138 #endif 1139 } 1140 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1141 *norm = sqrt(*norm); 1142 } else if (type == NORM_1) { /* max column norm */ 1143 PetscReal *tmp,*tmp2; 1144 int *jj,*garray = aij->garray; 1145 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1146 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1147 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1148 *norm = 0.0; 1149 v = amat->a; jj = amat->j; 1150 for (j=0; j<amat->nz; j++) { 1151 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1152 } 1153 v = bmat->a; jj = bmat->j; 1154 for (j=0; j<bmat->nz; j++) { 1155 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1156 } 1157 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1158 for (j=0; j<mat->N; j++) { 1159 if (tmp2[j] > *norm) *norm = tmp2[j]; 1160 } 1161 ierr = PetscFree(tmp);CHKERRQ(ierr); 1162 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1163 } else if (type == NORM_INFINITY) { /* max row norm */ 1164 PetscReal ntemp = 0.0; 1165 for (j=0; j<aij->A->m; j++) { 1166 v = amat->a + amat->i[j] + shift; 1167 sum = 0.0; 1168 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1169 sum += PetscAbsScalar(*v); v++; 1170 } 1171 v = bmat->a + bmat->i[j] + shift; 1172 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1173 sum += PetscAbsScalar(*v); v++; 1174 } 1175 if (sum > ntemp) ntemp = sum; 1176 } 1177 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1178 } else { 1179 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1180 } 1181 } 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatTranspose_MPIAIJ" 1187 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1188 { 1189 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1190 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1191 int ierr,shift = Aloc->indexshift; 1192 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1193 Mat B; 1194 PetscScalar *array; 1195 1196 PetscFunctionBegin; 1197 if (!matout && M != N) { 1198 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1199 } 1200 1201 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1202 1203 /* copy over the A part */ 1204 Aloc = (Mat_SeqAIJ*)a->A->data; 1205 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1206 row = a->rstart; 1207 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1208 for (i=0; i<m; i++) { 1209 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1210 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1211 } 1212 aj = Aloc->j; 1213 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1214 1215 /* copy over the B part */ 1216 Aloc = (Mat_SeqAIJ*)a->B->data; 1217 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1218 row = a->rstart; 1219 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1220 ct = cols; 1221 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1222 for (i=0; i<m; i++) { 1223 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1224 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1225 } 1226 ierr = PetscFree(ct);CHKERRQ(ierr); 1227 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1228 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1229 if (matout) { 1230 *matout = B; 1231 } else { 1232 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1233 } 1234 PetscFunctionReturn(0); 1235 } 1236 1237 #undef __FUNCT__ 1238 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1239 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1240 { 1241 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1242 Mat a = aij->A,b = aij->B; 1243 int ierr,s1,s2,s3; 1244 1245 PetscFunctionBegin; 1246 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1247 if (rr) { 1248 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1249 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1250 /* Overlap communication with computation. */ 1251 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1252 } 1253 if (ll) { 1254 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1255 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1256 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1257 } 1258 /* scale the diagonal block */ 1259 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1260 1261 if (rr) { 1262 /* Do a scatter end and then right scale the off-diagonal block */ 1263 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1264 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1265 } 1266 1267 PetscFunctionReturn(0); 1268 } 1269 1270 1271 #undef __FUNCT__ 1272 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1273 int MatPrintHelp_MPIAIJ(Mat A) 1274 { 1275 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1276 int ierr; 1277 1278 PetscFunctionBegin; 1279 if (!a->rank) { 1280 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1281 } 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1287 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1288 { 1289 PetscFunctionBegin; 1290 *bs = 1; 1291 PetscFunctionReturn(0); 1292 } 1293 #undef __FUNCT__ 1294 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1295 int MatSetUnfactored_MPIAIJ(Mat A) 1296 { 1297 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1298 int ierr; 1299 1300 PetscFunctionBegin; 1301 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatEqual_MPIAIJ" 1307 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1308 { 1309 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1310 Mat a,b,c,d; 1311 PetscTruth flg; 1312 int ierr; 1313 1314 PetscFunctionBegin; 1315 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1316 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1317 a = matA->A; b = matA->B; 1318 c = matB->A; d = matB->B; 1319 1320 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1321 if (flg == PETSC_TRUE) { 1322 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1323 } 1324 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1325 PetscFunctionReturn(0); 1326 } 1327 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "MatCopy_MPIAIJ" 1330 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1331 { 1332 int ierr; 1333 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1334 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1335 PetscTruth flg; 1336 1337 PetscFunctionBegin; 1338 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1339 if (str != SAME_NONZERO_PATTERN || !flg) { 1340 /* because of the column compression in the off-processor part of the matrix a->B, 1341 the number of columns in a->B and b->B may be different, hence we cannot call 1342 the MatCopy() directly on the two parts. If need be, we can provide a more 1343 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1344 then copying the submatrices */ 1345 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1346 } else { 1347 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1348 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1349 } 1350 PetscFunctionReturn(0); 1351 } 1352 1353 #undef __FUNCT__ 1354 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1355 int MatSetUpPreallocation_MPIAIJ(Mat A) 1356 { 1357 int ierr; 1358 1359 PetscFunctionBegin; 1360 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1365 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1366 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1367 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1368 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1369 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1370 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1371 #endif 1372 1373 #include "petscblaslapack.h" 1374 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatAXPY_MPIAIJ" 1377 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1378 { 1379 int ierr,one; 1380 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1381 Mat_SeqAIJ *x,*y; 1382 1383 PetscFunctionBegin; 1384 if (str == SAME_NONZERO_PATTERN) { 1385 x = (Mat_SeqAIJ *)xx->A->data; 1386 y = (Mat_SeqAIJ *)yy->A->data; 1387 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1388 x = (Mat_SeqAIJ *)xx->B->data; 1389 y = (Mat_SeqAIJ *)yy->B->data; 1390 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1391 } else { 1392 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1393 } 1394 PetscFunctionReturn(0); 1395 } 1396 1397 /* -------------------------------------------------------------------*/ 1398 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1399 MatGetRow_MPIAIJ, 1400 MatRestoreRow_MPIAIJ, 1401 MatMult_MPIAIJ, 1402 MatMultAdd_MPIAIJ, 1403 MatMultTranspose_MPIAIJ, 1404 MatMultTransposeAdd_MPIAIJ, 1405 0, 1406 0, 1407 0, 1408 0, 1409 0, 1410 0, 1411 MatRelax_MPIAIJ, 1412 MatTranspose_MPIAIJ, 1413 MatGetInfo_MPIAIJ, 1414 MatEqual_MPIAIJ, 1415 MatGetDiagonal_MPIAIJ, 1416 MatDiagonalScale_MPIAIJ, 1417 MatNorm_MPIAIJ, 1418 MatAssemblyBegin_MPIAIJ, 1419 MatAssemblyEnd_MPIAIJ, 1420 0, 1421 MatSetOption_MPIAIJ, 1422 MatZeroEntries_MPIAIJ, 1423 MatZeroRows_MPIAIJ, 1424 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1425 MatLUFactorSymbolic_MPIAIJ_TFS, 1426 #else 1427 0, 1428 #endif 1429 0, 1430 0, 1431 0, 1432 MatSetUpPreallocation_MPIAIJ, 1433 0, 1434 0, 1435 0, 1436 0, 1437 MatDuplicate_MPIAIJ, 1438 0, 1439 0, 1440 0, 1441 0, 1442 MatAXPY_MPIAIJ, 1443 MatGetSubMatrices_MPIAIJ, 1444 MatIncreaseOverlap_MPIAIJ, 1445 MatGetValues_MPIAIJ, 1446 MatCopy_MPIAIJ, 1447 MatPrintHelp_MPIAIJ, 1448 MatScale_MPIAIJ, 1449 0, 1450 0, 1451 0, 1452 MatGetBlockSize_MPIAIJ, 1453 0, 1454 0, 1455 0, 1456 0, 1457 MatFDColoringCreate_MPIAIJ, 1458 0, 1459 MatSetUnfactored_MPIAIJ, 1460 0, 1461 0, 1462 MatGetSubMatrix_MPIAIJ, 1463 MatDestroy_MPIAIJ, 1464 MatView_MPIAIJ, 1465 MatGetPetscMaps_Petsc, 1466 0, 1467 0, 1468 0, 1469 0, 1470 0, 1471 0, 1472 0, 1473 0, 1474 MatSetColoring_MPIAIJ, 1475 MatSetValuesAdic_MPIAIJ, 1476 MatSetValuesAdifor_MPIAIJ 1477 }; 1478 1479 /* ----------------------------------------------------------------------------------------*/ 1480 1481 EXTERN_C_BEGIN 1482 #undef __FUNCT__ 1483 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1484 int MatStoreValues_MPIAIJ(Mat mat) 1485 { 1486 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1487 int ierr; 1488 1489 PetscFunctionBegin; 1490 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1491 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1492 PetscFunctionReturn(0); 1493 } 1494 EXTERN_C_END 1495 1496 EXTERN_C_BEGIN 1497 #undef __FUNCT__ 1498 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1499 int MatRetrieveValues_MPIAIJ(Mat mat) 1500 { 1501 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1502 int ierr; 1503 1504 PetscFunctionBegin; 1505 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1506 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1507 PetscFunctionReturn(0); 1508 } 1509 EXTERN_C_END 1510 1511 #include "petscpc.h" 1512 EXTERN_C_BEGIN 1513 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1514 EXTERN_C_END 1515 1516 EXTERN_C_BEGIN 1517 #undef __FUNCT__ 1518 #define __FUNCT__ "MatCreate_MPIAIJ" 1519 int MatCreate_MPIAIJ(Mat B) 1520 { 1521 Mat_MPIAIJ *b; 1522 int ierr,i,size; 1523 #if defined(PETSC_HAVE_SUPERLUDIST) 1524 PetscTruth flg; 1525 #endif 1526 1527 1528 PetscFunctionBegin; 1529 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1530 1531 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1532 B->data = (void*)b; 1533 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1534 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1535 B->factor = 0; 1536 B->assembled = PETSC_FALSE; 1537 B->mapping = 0; 1538 1539 B->insertmode = NOT_SET_VALUES; 1540 b->size = size; 1541 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1542 1543 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1544 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1545 1546 /* the information in the maps duplicates the information computed below, eventually 1547 we should remove the duplicate information that is not contained in the maps */ 1548 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1549 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1550 1551 /* build local table of row and column ownerships */ 1552 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1553 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1554 b->cowners = b->rowners + b->size + 2; 1555 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1556 b->rowners[0] = 0; 1557 for (i=2; i<=b->size; i++) { 1558 b->rowners[i] += b->rowners[i-1]; 1559 } 1560 b->rstart = b->rowners[b->rank]; 1561 b->rend = b->rowners[b->rank+1]; 1562 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1563 b->cowners[0] = 0; 1564 for (i=2; i<=b->size; i++) { 1565 b->cowners[i] += b->cowners[i-1]; 1566 } 1567 b->cstart = b->cowners[b->rank]; 1568 b->cend = b->cowners[b->rank+1]; 1569 1570 /* build cache for off array entries formed */ 1571 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1572 b->donotstash = PETSC_FALSE; 1573 b->colmap = 0; 1574 b->garray = 0; 1575 b->roworiented = PETSC_TRUE; 1576 1577 /* stuff used for matrix vector multiply */ 1578 b->lvec = PETSC_NULL; 1579 b->Mvctx = PETSC_NULL; 1580 1581 /* stuff for MatGetRow() */ 1582 b->rowindices = 0; 1583 b->rowvalues = 0; 1584 b->getrowactive = PETSC_FALSE; 1585 1586 #if defined(PETSC_HAVE_SUPERLUDIST) 1587 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flg);CHKERRQ(ierr); 1588 if (flg) { ierr = MatUseSuperLU_DIST_MPIAIJ(B);CHKERRQ(ierr); } 1589 #endif 1590 1591 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1592 "MatStoreValues_MPIAIJ", 1593 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1594 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1595 "MatRetrieveValues_MPIAIJ", 1596 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1597 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1598 "MatGetDiagonalBlock_MPIAIJ", 1599 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1600 1601 PetscFunctionReturn(0); 1602 } 1603 EXTERN_C_END 1604 1605 #undef __FUNCT__ 1606 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1607 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1608 { 1609 Mat mat; 1610 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1611 int ierr; 1612 1613 PetscFunctionBegin; 1614 *newmat = 0; 1615 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1616 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1617 a = (Mat_MPIAIJ*)mat->data; 1618 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1619 mat->factor = matin->factor; 1620 mat->assembled = PETSC_TRUE; 1621 mat->insertmode = NOT_SET_VALUES; 1622 mat->preallocated = PETSC_TRUE; 1623 1624 a->rstart = oldmat->rstart; 1625 a->rend = oldmat->rend; 1626 a->cstart = oldmat->cstart; 1627 a->cend = oldmat->cend; 1628 a->size = oldmat->size; 1629 a->rank = oldmat->rank; 1630 a->donotstash = oldmat->donotstash; 1631 a->roworiented = oldmat->roworiented; 1632 a->rowindices = 0; 1633 a->rowvalues = 0; 1634 a->getrowactive = PETSC_FALSE; 1635 1636 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1637 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1638 if (oldmat->colmap) { 1639 #if defined (PETSC_USE_CTABLE) 1640 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1641 #else 1642 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1643 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1644 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1645 #endif 1646 } else a->colmap = 0; 1647 if (oldmat->garray) { 1648 int len; 1649 len = oldmat->B->n; 1650 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1651 PetscLogObjectMemory(mat,len*sizeof(int)); 1652 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1653 } else a->garray = 0; 1654 1655 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1656 PetscLogObjectParent(mat,a->lvec); 1657 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1658 PetscLogObjectParent(mat,a->Mvctx); 1659 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1660 PetscLogObjectParent(mat,a->A); 1661 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1662 PetscLogObjectParent(mat,a->B); 1663 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1664 *newmat = mat; 1665 PetscFunctionReturn(0); 1666 } 1667 1668 #include "petscsys.h" 1669 1670 EXTERN_C_BEGIN 1671 #undef __FUNCT__ 1672 #define __FUNCT__ "MatLoad_MPIAIJ" 1673 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1674 { 1675 Mat A; 1676 PetscScalar *vals,*svals; 1677 MPI_Comm comm = ((PetscObject)viewer)->comm; 1678 MPI_Status status; 1679 int i,nz,ierr,j,rstart,rend,fd; 1680 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1681 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1682 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1683 1684 PetscFunctionBegin; 1685 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1686 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1687 if (!rank) { 1688 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1689 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1690 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1691 if (header[3] < 0) { 1692 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1693 } 1694 } 1695 1696 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1697 M = header[1]; N = header[2]; 1698 /* determine ownership of all rows */ 1699 m = M/size + ((M % size) > rank); 1700 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1701 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1702 rowners[0] = 0; 1703 for (i=2; i<=size; i++) { 1704 rowners[i] += rowners[i-1]; 1705 } 1706 rstart = rowners[rank]; 1707 rend = rowners[rank+1]; 1708 1709 /* distribute row lengths to all processors */ 1710 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1711 offlens = ourlens + (rend-rstart); 1712 if (!rank) { 1713 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1714 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1715 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1716 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1717 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1718 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1719 } else { 1720 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1721 } 1722 1723 if (!rank) { 1724 /* calculate the number of nonzeros on each processor */ 1725 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1726 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1727 for (i=0; i<size; i++) { 1728 for (j=rowners[i]; j< rowners[i+1]; j++) { 1729 procsnz[i] += rowlengths[j]; 1730 } 1731 } 1732 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1733 1734 /* determine max buffer needed and allocate it */ 1735 maxnz = 0; 1736 for (i=0; i<size; i++) { 1737 maxnz = PetscMax(maxnz,procsnz[i]); 1738 } 1739 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1740 1741 /* read in my part of the matrix column indices */ 1742 nz = procsnz[0]; 1743 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1744 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1745 1746 /* read in every one elses and ship off */ 1747 for (i=1; i<size; i++) { 1748 nz = procsnz[i]; 1749 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1750 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1751 } 1752 ierr = PetscFree(cols);CHKERRQ(ierr); 1753 } else { 1754 /* determine buffer space needed for message */ 1755 nz = 0; 1756 for (i=0; i<m; i++) { 1757 nz += ourlens[i]; 1758 } 1759 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1760 1761 /* receive message of column indices*/ 1762 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1763 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1764 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1765 } 1766 1767 /* determine column ownership if matrix is not square */ 1768 if (N != M) { 1769 n = N/size + ((N % size) > rank); 1770 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1771 cstart = cend - n; 1772 } else { 1773 cstart = rstart; 1774 cend = rend; 1775 n = cend - cstart; 1776 } 1777 1778 /* loop over local rows, determining number of off diagonal entries */ 1779 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1780 jj = 0; 1781 for (i=0; i<m; i++) { 1782 for (j=0; j<ourlens[i]; j++) { 1783 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1784 jj++; 1785 } 1786 } 1787 1788 /* create our matrix */ 1789 for (i=0; i<m; i++) { 1790 ourlens[i] -= offlens[i]; 1791 } 1792 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1793 A = *newmat; 1794 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1795 for (i=0; i<m; i++) { 1796 ourlens[i] += offlens[i]; 1797 } 1798 1799 if (!rank) { 1800 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1801 1802 /* read in my part of the matrix numerical values */ 1803 nz = procsnz[0]; 1804 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1805 1806 /* insert into matrix */ 1807 jj = rstart; 1808 smycols = mycols; 1809 svals = vals; 1810 for (i=0; i<m; i++) { 1811 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1812 smycols += ourlens[i]; 1813 svals += ourlens[i]; 1814 jj++; 1815 } 1816 1817 /* read in other processors and ship out */ 1818 for (i=1; i<size; i++) { 1819 nz = procsnz[i]; 1820 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1821 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1822 } 1823 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1824 } else { 1825 /* receive numeric values */ 1826 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1827 1828 /* receive message of values*/ 1829 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1830 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1831 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1832 1833 /* insert into matrix */ 1834 jj = rstart; 1835 smycols = mycols; 1836 svals = vals; 1837 for (i=0; i<m; i++) { 1838 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1839 smycols += ourlens[i]; 1840 svals += ourlens[i]; 1841 jj++; 1842 } 1843 } 1844 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1845 ierr = PetscFree(vals);CHKERRQ(ierr); 1846 ierr = PetscFree(mycols);CHKERRQ(ierr); 1847 ierr = PetscFree(rowners);CHKERRQ(ierr); 1848 1849 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1850 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1851 PetscFunctionReturn(0); 1852 } 1853 EXTERN_C_END 1854 1855 #undef __FUNCT__ 1856 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1857 /* 1858 Not great since it makes two copies of the submatrix, first an SeqAIJ 1859 in local and then by concatenating the local matrices the end result. 1860 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1861 */ 1862 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1863 { 1864 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1865 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1866 Mat *local,M,Mreuse; 1867 PetscScalar *vwork,*aa; 1868 MPI_Comm comm = mat->comm; 1869 Mat_SeqAIJ *aij; 1870 1871 1872 PetscFunctionBegin; 1873 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1874 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1875 1876 if (call == MAT_REUSE_MATRIX) { 1877 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1878 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1879 local = &Mreuse; 1880 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1881 } else { 1882 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1883 Mreuse = *local; 1884 ierr = PetscFree(local);CHKERRQ(ierr); 1885 } 1886 1887 /* 1888 m - number of local rows 1889 n - number of columns (same on all processors) 1890 rstart - first row in new global matrix generated 1891 */ 1892 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1893 if (call == MAT_INITIAL_MATRIX) { 1894 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1895 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1896 ii = aij->i; 1897 jj = aij->j; 1898 1899 /* 1900 Determine the number of non-zeros in the diagonal and off-diagonal 1901 portions of the matrix in order to do correct preallocation 1902 */ 1903 1904 /* first get start and end of "diagonal" columns */ 1905 if (csize == PETSC_DECIDE) { 1906 nlocal = n/size + ((n % size) > rank); 1907 } else { 1908 nlocal = csize; 1909 } 1910 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1911 rstart = rend - nlocal; 1912 if (rank == size - 1 && rend != n) { 1913 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1914 } 1915 1916 /* next, compute all the lengths */ 1917 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1918 olens = dlens + m; 1919 for (i=0; i<m; i++) { 1920 jend = ii[i+1] - ii[i]; 1921 olen = 0; 1922 dlen = 0; 1923 for (j=0; j<jend; j++) { 1924 if (*jj < rstart || *jj >= rend) olen++; 1925 else dlen++; 1926 jj++; 1927 } 1928 olens[i] = olen; 1929 dlens[i] = dlen; 1930 } 1931 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1932 ierr = PetscFree(dlens);CHKERRQ(ierr); 1933 } else { 1934 int ml,nl; 1935 1936 M = *newmat; 1937 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1938 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1939 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1940 /* 1941 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1942 rather than the slower MatSetValues(). 1943 */ 1944 M->was_assembled = PETSC_TRUE; 1945 M->assembled = PETSC_FALSE; 1946 } 1947 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1948 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1949 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1950 ii = aij->i; 1951 jj = aij->j; 1952 aa = aij->a; 1953 for (i=0; i<m; i++) { 1954 row = rstart + i; 1955 nz = ii[i+1] - ii[i]; 1956 cwork = jj; jj += nz; 1957 vwork = aa; aa += nz; 1958 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1959 } 1960 1961 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1962 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1963 *newmat = M; 1964 1965 /* save submatrix used in processor for next request */ 1966 if (call == MAT_INITIAL_MATRIX) { 1967 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1968 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1969 } 1970 1971 PetscFunctionReturn(0); 1972 } 1973 1974 #undef __FUNCT__ 1975 #define __FUNCT__ "MatMPIAIJSetPreallocation" 1976 /*@C 1977 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 1978 (the default parallel PETSc format). For good matrix assembly performance 1979 the user should preallocate the matrix storage by setting the parameters 1980 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1981 performance can be increased by more than a factor of 50. 1982 1983 Collective on MPI_Comm 1984 1985 Input Parameters: 1986 + A - the matrix 1987 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1988 (same value is used for all local rows) 1989 . d_nnz - array containing the number of nonzeros in the various rows of the 1990 DIAGONAL portion of the local submatrix (possibly different for each row) 1991 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1992 The size of this array is equal to the number of local rows, i.e 'm'. 1993 You must leave room for the diagonal entry even if it is zero. 1994 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1995 submatrix (same value is used for all local rows). 1996 - o_nnz - array containing the number of nonzeros in the various rows of the 1997 OFF-DIAGONAL portion of the local submatrix (possibly different for 1998 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1999 structure. The size of this array is equal to the number 2000 of local rows, i.e 'm'. 2001 2002 The AIJ format (also called the Yale sparse matrix format or 2003 compressed row storage), is fully compatible with standard Fortran 77 2004 storage. That is, the stored row and column indices can begin at 2005 either one (as in Fortran) or zero. See the users manual for details. 2006 2007 The user MUST specify either the local or global matrix dimensions 2008 (possibly both). 2009 2010 The parallel matrix is partitioned such that the first m0 rows belong to 2011 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2012 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2013 2014 The DIAGONAL portion of the local submatrix of a processor can be defined 2015 as the submatrix which is obtained by extraction the part corresponding 2016 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2017 first row that belongs to the processor, and r2 is the last row belonging 2018 to the this processor. This is a square mxm matrix. The remaining portion 2019 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2020 2021 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2022 2023 By default, this format uses inodes (identical nodes) when possible. 2024 We search for consecutive rows with the same nonzero structure, thereby 2025 reusing matrix information to achieve increased efficiency. 2026 2027 Options Database Keys: 2028 + -mat_aij_no_inode - Do not use inodes 2029 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2030 - -mat_aij_oneindex - Internally use indexing starting at 1 2031 rather than 0. Note that when calling MatSetValues(), 2032 the user still MUST index entries starting at 0! 2033 2034 Example usage: 2035 2036 Consider the following 8x8 matrix with 34 non-zero values, that is 2037 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2038 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2039 as follows: 2040 2041 .vb 2042 1 2 0 | 0 3 0 | 0 4 2043 Proc0 0 5 6 | 7 0 0 | 8 0 2044 9 0 10 | 11 0 0 | 12 0 2045 ------------------------------------- 2046 13 0 14 | 15 16 17 | 0 0 2047 Proc1 0 18 0 | 19 20 21 | 0 0 2048 0 0 0 | 22 23 0 | 24 0 2049 ------------------------------------- 2050 Proc2 25 26 27 | 0 0 28 | 29 0 2051 30 0 0 | 31 32 33 | 0 34 2052 .ve 2053 2054 This can be represented as a collection of submatrices as: 2055 2056 .vb 2057 A B C 2058 D E F 2059 G H I 2060 .ve 2061 2062 Where the submatrices A,B,C are owned by proc0, D,E,F are 2063 owned by proc1, G,H,I are owned by proc2. 2064 2065 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2066 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2067 The 'M','N' parameters are 8,8, and have the same values on all procs. 2068 2069 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2070 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2071 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2072 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2073 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2074 matrix, ans [DF] as another SeqAIJ matrix. 2075 2076 When d_nz, o_nz parameters are specified, d_nz storage elements are 2077 allocated for every row of the local diagonal submatrix, and o_nz 2078 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2079 One way to choose d_nz and o_nz is to use the max nonzerors per local 2080 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2081 In this case, the values of d_nz,o_nz are: 2082 .vb 2083 proc0 : dnz = 2, o_nz = 2 2084 proc1 : dnz = 3, o_nz = 2 2085 proc2 : dnz = 1, o_nz = 4 2086 .ve 2087 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2088 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2089 for proc3. i.e we are using 12+15+10=37 storage locations to store 2090 34 values. 2091 2092 When d_nnz, o_nnz parameters are specified, the storage is specified 2093 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2094 In the above case the values for d_nnz,o_nnz are: 2095 .vb 2096 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2097 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2098 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2099 .ve 2100 Here the space allocated is sum of all the above values i.e 34, and 2101 hence pre-allocation is perfect. 2102 2103 Level: intermediate 2104 2105 .keywords: matrix, aij, compressed row, sparse, parallel 2106 2107 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2108 @*/ 2109 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2110 { 2111 Mat_MPIAIJ *b; 2112 int ierr,i; 2113 PetscTruth flg2; 2114 2115 PetscFunctionBegin; 2116 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2117 if (!flg2) PetscFunctionReturn(0); 2118 B->preallocated = PETSC_TRUE; 2119 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2120 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2121 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2122 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2123 if (d_nnz) { 2124 for (i=0; i<B->m; i++) { 2125 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2126 } 2127 } 2128 if (o_nnz) { 2129 for (i=0; i<B->m; i++) { 2130 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2131 } 2132 } 2133 b = (Mat_MPIAIJ*)B->data; 2134 2135 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2136 PetscLogObjectParent(B,b->A); 2137 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2138 PetscLogObjectParent(B,b->B); 2139 2140 PetscFunctionReturn(0); 2141 } 2142 2143 #undef __FUNCT__ 2144 #define __FUNCT__ "MatCreateMPIAIJ" 2145 /*@C 2146 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2147 (the default parallel PETSc format). For good matrix assembly performance 2148 the user should preallocate the matrix storage by setting the parameters 2149 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2150 performance can be increased by more than a factor of 50. 2151 2152 Collective on MPI_Comm 2153 2154 Input Parameters: 2155 + comm - MPI communicator 2156 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2157 This value should be the same as the local size used in creating the 2158 y vector for the matrix-vector product y = Ax. 2159 . n - This value should be the same as the local size used in creating the 2160 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2161 calculated if N is given) For square matrices n is almost always m. 2162 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2163 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2164 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2165 (same value is used for all local rows) 2166 . d_nnz - array containing the number of nonzeros in the various rows of the 2167 DIAGONAL portion of the local submatrix (possibly different for each row) 2168 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2169 The size of this array is equal to the number of local rows, i.e 'm'. 2170 You must leave room for the diagonal entry even if it is zero. 2171 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2172 submatrix (same value is used for all local rows). 2173 - o_nnz - array containing the number of nonzeros in the various rows of the 2174 OFF-DIAGONAL portion of the local submatrix (possibly different for 2175 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2176 structure. The size of this array is equal to the number 2177 of local rows, i.e 'm'. 2178 2179 Output Parameter: 2180 . A - the matrix 2181 2182 Notes: 2183 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2184 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2185 storage requirements for this matrix. 2186 2187 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2188 processor than it must be used on all processors that share the object for 2189 that argument. 2190 2191 The AIJ format (also called the Yale sparse matrix format or 2192 compressed row storage), is fully compatible with standard Fortran 77 2193 storage. That is, the stored row and column indices can begin at 2194 either one (as in Fortran) or zero. See the users manual for details. 2195 2196 The user MUST specify either the local or global matrix dimensions 2197 (possibly both). 2198 2199 The parallel matrix is partitioned such that the first m0 rows belong to 2200 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2201 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2202 2203 The DIAGONAL portion of the local submatrix of a processor can be defined 2204 as the submatrix which is obtained by extraction the part corresponding 2205 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2206 first row that belongs to the processor, and r2 is the last row belonging 2207 to the this processor. This is a square mxm matrix. The remaining portion 2208 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2209 2210 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2211 2212 By default, this format uses inodes (identical nodes) when possible. 2213 We search for consecutive rows with the same nonzero structure, thereby 2214 reusing matrix information to achieve increased efficiency. 2215 2216 Options Database Keys: 2217 + -mat_aij_no_inode - Do not use inodes 2218 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2219 - -mat_aij_oneindex - Internally use indexing starting at 1 2220 rather than 0. Note that when calling MatSetValues(), 2221 the user still MUST index entries starting at 0! 2222 2223 2224 Example usage: 2225 2226 Consider the following 8x8 matrix with 34 non-zero values, that is 2227 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2228 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2229 as follows: 2230 2231 .vb 2232 1 2 0 | 0 3 0 | 0 4 2233 Proc0 0 5 6 | 7 0 0 | 8 0 2234 9 0 10 | 11 0 0 | 12 0 2235 ------------------------------------- 2236 13 0 14 | 15 16 17 | 0 0 2237 Proc1 0 18 0 | 19 20 21 | 0 0 2238 0 0 0 | 22 23 0 | 24 0 2239 ------------------------------------- 2240 Proc2 25 26 27 | 0 0 28 | 29 0 2241 30 0 0 | 31 32 33 | 0 34 2242 .ve 2243 2244 This can be represented as a collection of submatrices as: 2245 2246 .vb 2247 A B C 2248 D E F 2249 G H I 2250 .ve 2251 2252 Where the submatrices A,B,C are owned by proc0, D,E,F are 2253 owned by proc1, G,H,I are owned by proc2. 2254 2255 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2256 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2257 The 'M','N' parameters are 8,8, and have the same values on all procs. 2258 2259 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2260 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2261 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2262 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2263 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2264 matrix, ans [DF] as another SeqAIJ matrix. 2265 2266 When d_nz, o_nz parameters are specified, d_nz storage elements are 2267 allocated for every row of the local diagonal submatrix, and o_nz 2268 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2269 One way to choose d_nz and o_nz is to use the max nonzerors per local 2270 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2271 In this case, the values of d_nz,o_nz are: 2272 .vb 2273 proc0 : dnz = 2, o_nz = 2 2274 proc1 : dnz = 3, o_nz = 2 2275 proc2 : dnz = 1, o_nz = 4 2276 .ve 2277 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2278 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2279 for proc3. i.e we are using 12+15+10=37 storage locations to store 2280 34 values. 2281 2282 When d_nnz, o_nnz parameters are specified, the storage is specified 2283 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2284 In the above case the values for d_nnz,o_nnz are: 2285 .vb 2286 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2287 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2288 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2289 .ve 2290 Here the space allocated is sum of all the above values i.e 34, and 2291 hence pre-allocation is perfect. 2292 2293 Level: intermediate 2294 2295 .keywords: matrix, aij, compressed row, sparse, parallel 2296 2297 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2298 @*/ 2299 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2300 { 2301 int ierr,size; 2302 2303 PetscFunctionBegin; 2304 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2305 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2306 if (size > 1) { 2307 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2308 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2309 } else { 2310 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2311 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2312 } 2313 PetscFunctionReturn(0); 2314 } 2315 2316 #undef __FUNCT__ 2317 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2318 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2319 { 2320 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2321 PetscFunctionBegin; 2322 *Ad = a->A; 2323 *Ao = a->B; 2324 *colmap = a->garray; 2325 PetscFunctionReturn(0); 2326 } 2327 2328 #undef __FUNCT__ 2329 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2330 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2331 { 2332 int ierr; 2333 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2334 2335 PetscFunctionBegin; 2336 if (coloring->ctype == IS_COLORING_LOCAL) { 2337 int *allcolors,*colors,i; 2338 ISColoring ocoloring; 2339 2340 /* set coloring for diagonal portion */ 2341 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2342 2343 /* set coloring for off-diagonal portion */ 2344 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2345 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2346 for (i=0; i<a->B->n; i++) { 2347 colors[i] = allcolors[a->garray[i]]; 2348 } 2349 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2350 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2351 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2352 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2353 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2354 int *colors,i,*larray; 2355 ISColoring ocoloring; 2356 2357 /* set coloring for diagonal portion */ 2358 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2359 for (i=0; i<a->A->n; i++) { 2360 larray[i] = i + a->cstart; 2361 } 2362 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2363 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2364 for (i=0; i<a->A->n; i++) { 2365 colors[i] = coloring->colors[larray[i]]; 2366 } 2367 ierr = PetscFree(larray);CHKERRQ(ierr); 2368 ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2369 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2370 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2371 2372 /* set coloring for off-diagonal portion */ 2373 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2374 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2375 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2376 for (i=0; i<a->B->n; i++) { 2377 colors[i] = coloring->colors[larray[i]]; 2378 } 2379 ierr = PetscFree(larray);CHKERRQ(ierr); 2380 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2381 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2382 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2383 } else { 2384 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2385 } 2386 2387 PetscFunctionReturn(0); 2388 } 2389 2390 #undef __FUNCT__ 2391 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2392 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2393 { 2394 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2395 int ierr; 2396 2397 PetscFunctionBegin; 2398 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2399 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2400 PetscFunctionReturn(0); 2401 } 2402 2403 #undef __FUNCT__ 2404 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2405 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2406 { 2407 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2408 int ierr; 2409 2410 PetscFunctionBegin; 2411 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2412 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2413 PetscFunctionReturn(0); 2414 } 2415