1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 EXTERN int MatUseSuperLU_DIST_MPIAIJ(Mat); 14 15 /* 16 Local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 19 a slightly higher hash table cost; without it it is not scalable (each processor 20 has an order N integer array but is fast to acess. 21 */ 22 #undef __FUNCT__ 23 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 24 int CreateColmap_MPIAIJ_Private(Mat mat) 25 { 26 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 27 int n = aij->B->n,i,ierr; 28 29 PetscFunctionBegin; 30 #if defined (PETSC_USE_CTABLE) 31 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 32 for (i=0; i<n; i++){ 33 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 34 } 35 #else 36 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 37 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 38 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 39 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 40 #endif 41 PetscFunctionReturn(0); 42 } 43 44 #define CHUNKSIZE 15 45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 46 { \ 47 \ 48 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 49 rmax = aimax[row]; nrow = ailen[row]; \ 50 col1 = col - shift; \ 51 \ 52 low = 0; high = nrow; \ 53 while (high-low > 5) { \ 54 t = (low+high)/2; \ 55 if (rp[t] > col) high = t; \ 56 else low = t; \ 57 } \ 58 for (_i=low; _i<high; _i++) { \ 59 if (rp[_i] > col1) break; \ 60 if (rp[_i] == col1) { \ 61 if (addv == ADD_VALUES) ap[_i] += value; \ 62 else ap[_i] = value; \ 63 goto a_noinsert; \ 64 } \ 65 } \ 66 if (nonew == 1) goto a_noinsert; \ 67 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 68 if (nrow >= rmax) { \ 69 /* there is no extra room in row, therefore enlarge */ \ 70 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 71 PetscScalar *new_a; \ 72 \ 73 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 74 \ 75 /* malloc new storage space */ \ 76 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \ 77 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 78 new_j = (int*)(new_a + new_nz); \ 79 new_i = new_j + new_nz; \ 80 \ 81 /* copy over old data into new slots */ \ 82 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 83 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 84 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 85 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 86 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 87 len*sizeof(int));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 90 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 91 /* free up old matrix storage */ \ 92 \ 93 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 94 if (!a->singlemalloc) { \ 95 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 96 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 97 } \ 98 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 99 a->singlemalloc = PETSC_TRUE; \ 100 \ 101 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 102 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 103 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 104 a->maxnz += CHUNKSIZE; \ 105 a->reallocs++; \ 106 } \ 107 N = nrow++ - 1; a->nz++; \ 108 /* shift up all the later entries in this row */ \ 109 for (ii=N; ii>=_i; ii--) { \ 110 rp[ii+1] = rp[ii]; \ 111 ap[ii+1] = ap[ii]; \ 112 } \ 113 rp[_i] = col1; \ 114 ap[_i] = value; \ 115 a_noinsert: ; \ 116 ailen[row] = nrow; \ 117 } 118 119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 120 { \ 121 \ 122 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 123 rmax = bimax[row]; nrow = bilen[row]; \ 124 col1 = col - shift; \ 125 \ 126 low = 0; high = nrow; \ 127 while (high-low > 5) { \ 128 t = (low+high)/2; \ 129 if (rp[t] > col) high = t; \ 130 else low = t; \ 131 } \ 132 for (_i=low; _i<high; _i++) { \ 133 if (rp[_i] > col1) break; \ 134 if (rp[_i] == col1) { \ 135 if (addv == ADD_VALUES) ap[_i] += value; \ 136 else ap[_i] = value; \ 137 goto b_noinsert; \ 138 } \ 139 } \ 140 if (nonew == 1) goto b_noinsert; \ 141 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 142 if (nrow >= rmax) { \ 143 /* there is no extra room in row, therefore enlarge */ \ 144 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 145 PetscScalar *new_a; \ 146 \ 147 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 148 \ 149 /* malloc new storage space */ \ 150 len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \ 151 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 152 new_j = (int*)(new_a + new_nz); \ 153 new_i = new_j + new_nz; \ 154 \ 155 /* copy over old data into new slots */ \ 156 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 157 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 158 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 159 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 160 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 161 len*sizeof(int));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \ 163 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 164 len*sizeof(PetscScalar));CHKERRQ(ierr); \ 165 /* free up old matrix storage */ \ 166 \ 167 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 168 if (!b->singlemalloc) { \ 169 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 170 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 171 } \ 172 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 173 b->singlemalloc = PETSC_TRUE; \ 174 \ 175 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 176 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 177 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \ 178 b->maxnz += CHUNKSIZE; \ 179 b->reallocs++; \ 180 } \ 181 N = nrow++ - 1; b->nz++; \ 182 /* shift up all the later entries in this row */ \ 183 for (ii=N; ii>=_i; ii--) { \ 184 rp[ii+1] = rp[ii]; \ 185 ap[ii+1] = ap[ii]; \ 186 } \ 187 rp[_i] = col1; \ 188 ap[_i] = value; \ 189 b_noinsert: ; \ 190 bilen[row] = nrow; \ 191 } 192 193 #undef __FUNCT__ 194 #define __FUNCT__ "MatSetValues_MPIAIJ" 195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 196 { 197 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 198 PetscScalar value; 199 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 200 int cstart = aij->cstart,cend = aij->cend,row,col; 201 PetscTruth roworiented = aij->roworiented; 202 203 /* Some Variables required in the macro */ 204 Mat A = aij->A; 205 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 206 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 207 PetscScalar *aa = a->a; 208 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 209 Mat B = aij->B; 210 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 211 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 212 PetscScalar *ba = b->a; 213 214 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 215 int nonew = a->nonew,shift = a->indexshift; 216 PetscScalar *ap; 217 218 PetscFunctionBegin; 219 for (i=0; i<m; i++) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_BOPT_g) 222 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 223 #endif 224 if (im[i] >= rstart && im[i] < rend) { 225 row = im[i] - rstart; 226 for (j=0; j<n; j++) { 227 if (in[j] >= cstart && in[j] < cend){ 228 col = in[j] - cstart; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 if (ignorezeroentries && value == 0.0) continue; 231 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 232 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 233 } else if (in[j] < 0) continue; 234 #if defined(PETSC_USE_BOPT_g) 235 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 236 #endif 237 else { 238 if (mat->was_assembled) { 239 if (!aij->colmap) { 240 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 241 } 242 #if defined (PETSC_USE_CTABLE) 243 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 244 col--; 245 #else 246 col = aij->colmap[in[j]] - 1; 247 #endif 248 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 249 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 250 col = in[j]; 251 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 252 B = aij->B; 253 b = (Mat_SeqAIJ*)B->data; 254 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 255 ba = b->a; 256 } 257 } else col = in[j]; 258 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259 if (ignorezeroentries && value == 0.0) continue; 260 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 261 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 262 } 263 } 264 } else { 265 if (!aij->donotstash) { 266 if (roworiented) { 267 if (ignorezeroentries && v[i*n] == 0.0) continue; 268 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 269 } else { 270 if (ignorezeroentries && v[i] == 0.0) continue; 271 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 272 } 273 } 274 } 275 } 276 PetscFunctionReturn(0); 277 } 278 279 #undef __FUNCT__ 280 #define __FUNCT__ "MatGetValues_MPIAIJ" 281 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v) 282 { 283 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 284 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 285 int cstart = aij->cstart,cend = aij->cend,row,col; 286 287 PetscFunctionBegin; 288 for (i=0; i<m; i++) { 289 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 290 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 291 if (idxm[i] >= rstart && idxm[i] < rend) { 292 row = idxm[i] - rstart; 293 for (j=0; j<n; j++) { 294 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 295 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 296 if (idxn[j] >= cstart && idxn[j] < cend){ 297 col = idxn[j] - cstart; 298 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 299 } else { 300 if (!aij->colmap) { 301 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 302 } 303 #if defined (PETSC_USE_CTABLE) 304 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 305 col --; 306 #else 307 col = aij->colmap[idxn[j]] - 1; 308 #endif 309 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 310 else { 311 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 312 } 313 } 314 } 315 } else { 316 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 317 } 318 } 319 PetscFunctionReturn(0); 320 } 321 322 #undef __FUNCT__ 323 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 324 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 325 { 326 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 327 int ierr,nstash,reallocs; 328 InsertMode addv; 329 330 PetscFunctionBegin; 331 if (aij->donotstash) { 332 PetscFunctionReturn(0); 333 } 334 335 /* make sure all processors are either in INSERTMODE or ADDMODE */ 336 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 337 if (addv == (ADD_VALUES|INSERT_VALUES)) { 338 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 339 } 340 mat->insertmode = addv; /* in case this processor had no cache */ 341 342 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 343 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 344 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 345 PetscFunctionReturn(0); 346 } 347 348 349 #undef __FUNCT__ 350 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 351 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 352 { 353 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 354 int i,j,rstart,ncols,n,ierr,flg; 355 int *row,*col,other_disassembled; 356 PetscScalar *val; 357 InsertMode addv = mat->insertmode; 358 #if defined(PETSC_HAVE_SUPERLUDIST) 359 PetscTruth flag; 360 #endif 361 362 PetscFunctionBegin; 363 if (!aij->donotstash) { 364 while (1) { 365 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 366 if (!flg) break; 367 368 for (i=0; i<n;) { 369 /* Now identify the consecutive vals belonging to the same row */ 370 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 371 if (j < n) ncols = j-i; 372 else ncols = n-i; 373 /* Now assemble all these values with a single function call */ 374 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 375 i = j; 376 } 377 } 378 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 379 } 380 381 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 382 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 383 384 /* determine if any processor has disassembled, if so we must 385 also disassemble ourselfs, in order that we may reassemble. */ 386 /* 387 if nonzero structure of submatrix B cannot change then we know that 388 no processor disassembled thus we can skip this stuff 389 */ 390 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 391 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 392 if (mat->was_assembled && !other_disassembled) { 393 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 394 } 395 } 396 397 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 398 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 399 } 400 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 401 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 402 403 if (aij->rowvalues) { 404 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 405 aij->rowvalues = 0; 406 } 407 #if defined(PETSC_HAVE_SUPERLUDIST) 408 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr); 409 if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); } 410 #endif 411 PetscFunctionReturn(0); 412 } 413 414 #undef __FUNCT__ 415 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 416 int MatZeroEntries_MPIAIJ(Mat A) 417 { 418 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 419 int ierr; 420 421 PetscFunctionBegin; 422 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 423 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatZeroRows_MPIAIJ" 429 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag) 430 { 431 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 432 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 433 int *procs,*nprocs,j,idx,nsends,*work,row; 434 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 435 int *rvalues,tag = A->tag,count,base,slen,n,*source; 436 int *lens,imdex,*lrows,*values,rstart=l->rstart; 437 MPI_Comm comm = A->comm; 438 MPI_Request *send_waits,*recv_waits; 439 MPI_Status recv_status,*send_status; 440 IS istmp; 441 PetscTruth found; 442 443 PetscFunctionBegin; 444 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 445 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 446 447 /* first count number of contributors to each processor */ 448 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 449 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 450 procs = nprocs + size; 451 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 452 for (i=0; i<N; i++) { 453 idx = rows[i]; 454 found = PETSC_FALSE; 455 for (j=0; j<size; j++) { 456 if (idx >= owners[j] && idx < owners[j+1]) { 457 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 458 } 459 } 460 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 461 } 462 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 463 464 /* inform other processors of number of messages and max length*/ 465 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 466 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 467 nrecvs = work[size+rank]; 468 nmax = work[rank]; 469 ierr = PetscFree(work);CHKERRQ(ierr); 470 471 /* post receives: */ 472 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 473 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 474 for (i=0; i<nrecvs; i++) { 475 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 476 } 477 478 /* do sends: 479 1) starts[i] gives the starting index in svalues for stuff going to 480 the ith processor 481 */ 482 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 483 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 484 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 485 starts[0] = 0; 486 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 487 for (i=0; i<N; i++) { 488 svalues[starts[owner[i]]++] = rows[i]; 489 } 490 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 491 492 starts[0] = 0; 493 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 494 count = 0; 495 for (i=0; i<size; i++) { 496 if (procs[i]) { 497 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 498 } 499 } 500 ierr = PetscFree(starts);CHKERRQ(ierr); 501 502 base = owners[rank]; 503 504 /* wait on receives */ 505 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 506 source = lens + nrecvs; 507 count = nrecvs; slen = 0; 508 while (count) { 509 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 510 /* unpack receives into our local space */ 511 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 512 source[imdex] = recv_status.MPI_SOURCE; 513 lens[imdex] = n; 514 slen += n; 515 count--; 516 } 517 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 518 519 /* move the data into the send scatter */ 520 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 521 count = 0; 522 for (i=0; i<nrecvs; i++) { 523 values = rvalues + i*nmax; 524 for (j=0; j<lens[i]; j++) { 525 lrows[count++] = values[j] - base; 526 } 527 } 528 ierr = PetscFree(rvalues);CHKERRQ(ierr); 529 ierr = PetscFree(lens);CHKERRQ(ierr); 530 ierr = PetscFree(owner);CHKERRQ(ierr); 531 ierr = PetscFree(nprocs);CHKERRQ(ierr); 532 533 /* actually zap the local rows */ 534 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 535 PetscLogObjectParent(A,istmp); 536 537 /* 538 Zero the required rows. If the "diagonal block" of the matrix 539 is square and the user wishes to set the diagonal we use seperate 540 code so that MatSetValues() is not called for each diagonal allocating 541 new memory, thus calling lots of mallocs and slowing things down. 542 543 Contributed by: Mathew Knepley 544 */ 545 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 546 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 547 if (diag && (l->A->M == l->A->N)) { 548 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 549 } else if (diag) { 550 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 551 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 552 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 553 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 554 } 555 for (i = 0; i < slen; i++) { 556 row = lrows[i] + rstart; 557 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 558 } 559 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 560 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 561 } else { 562 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 563 } 564 ierr = ISDestroy(istmp);CHKERRQ(ierr); 565 ierr = PetscFree(lrows);CHKERRQ(ierr); 566 567 /* wait on sends */ 568 if (nsends) { 569 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 570 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 571 ierr = PetscFree(send_status);CHKERRQ(ierr); 572 } 573 ierr = PetscFree(send_waits);CHKERRQ(ierr); 574 ierr = PetscFree(svalues);CHKERRQ(ierr); 575 576 PetscFunctionReturn(0); 577 } 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatMult_MPIAIJ" 581 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 582 { 583 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 584 int ierr,nt; 585 586 PetscFunctionBegin; 587 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 588 if (nt != A->n) { 589 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 590 } 591 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 592 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 593 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 594 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNCT__ 599 #define __FUNCT__ "MatMultAdd_MPIAIJ" 600 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 601 { 602 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 603 int ierr; 604 605 PetscFunctionBegin; 606 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 607 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 608 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 609 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 610 PetscFunctionReturn(0); 611 } 612 613 #undef __FUNCT__ 614 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 615 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 616 { 617 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 618 int ierr; 619 620 PetscFunctionBegin; 621 /* do nondiagonal part */ 622 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 623 /* send it on its way */ 624 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 625 /* do local part */ 626 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 627 /* receive remote parts: note this assumes the values are not actually */ 628 /* inserted in yy until the next line, which is true for my implementation*/ 629 /* but is not perhaps always true. */ 630 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 636 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 637 { 638 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 639 int ierr; 640 641 PetscFunctionBegin; 642 /* do nondiagonal part */ 643 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 644 /* send it on its way */ 645 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 646 /* do local part */ 647 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 648 /* receive remote parts: note this assumes the values are not actually */ 649 /* inserted in yy until the next line, which is true for my implementation*/ 650 /* but is not perhaps always true. */ 651 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 652 PetscFunctionReturn(0); 653 } 654 655 /* 656 This only works correctly for square matrices where the subblock A->A is the 657 diagonal block 658 */ 659 #undef __FUNCT__ 660 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 661 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 662 { 663 int ierr; 664 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 665 666 PetscFunctionBegin; 667 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 668 if (a->rstart != a->cstart || a->rend != a->cend) { 669 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 670 } 671 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 672 PetscFunctionReturn(0); 673 } 674 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatScale_MPIAIJ" 677 int MatScale_MPIAIJ(PetscScalar *aa,Mat A) 678 { 679 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 680 int ierr; 681 682 PetscFunctionBegin; 683 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 684 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 685 PetscFunctionReturn(0); 686 } 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatDestroy_MPIAIJ" 690 int MatDestroy_MPIAIJ(Mat mat) 691 { 692 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 693 int ierr; 694 695 PetscFunctionBegin; 696 #if defined(PETSC_USE_LOG) 697 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 698 #endif 699 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 700 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 701 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 702 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 703 #if defined (PETSC_USE_CTABLE) 704 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 705 #else 706 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 707 #endif 708 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 709 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 710 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 711 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 712 ierr = PetscFree(aij);CHKERRQ(ierr); 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 718 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 722 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 723 PetscTruth isdraw,isascii,flg; 724 PetscViewer sviewer; 725 PetscViewerFormat format; 726 727 PetscFunctionBegin; 728 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 729 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 730 if (isascii) { 731 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 732 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 733 MatInfo info; 734 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 735 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 736 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 737 if (flg) { 738 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 739 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 740 } else { 741 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 742 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 743 } 744 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 745 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 746 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 747 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 748 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 749 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } else if (format == PETSC_VIEWER_ASCII_INFO) { 752 PetscFunctionReturn(0); 753 } 754 } else if (isdraw) { 755 PetscDraw draw; 756 PetscTruth isnull; 757 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 758 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 759 } 760 761 if (size == 1) { 762 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 763 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 764 } else { 765 /* assemble the entire matrix onto first processor. */ 766 Mat A; 767 Mat_SeqAIJ *Aloc; 768 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 769 PetscScalar *a; 770 771 if (!rank) { 772 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 773 } else { 774 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 775 } 776 PetscLogObjectParent(mat,A); 777 778 /* copy over the A part */ 779 Aloc = (Mat_SeqAIJ*)aij->A->data; 780 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 781 row = aij->rstart; 782 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 783 for (i=0; i<m; i++) { 784 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 785 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 786 } 787 aj = Aloc->j; 788 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 789 790 /* copy over the B part */ 791 Aloc = (Mat_SeqAIJ*)aij->B->data; 792 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 793 row = aij->rstart; 794 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 795 ct = cols; 796 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 797 for (i=0; i<m; i++) { 798 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 799 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 800 } 801 ierr = PetscFree(ct);CHKERRQ(ierr); 802 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 803 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 804 /* 805 Everyone has to call to draw the matrix since the graphics waits are 806 synchronized across all processors that share the PetscDraw object 807 */ 808 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 809 if (!rank) { 810 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 811 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 812 } 813 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 814 ierr = MatDestroy(A);CHKERRQ(ierr); 815 } 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNCT__ 820 #define __FUNCT__ "MatView_MPIAIJ" 821 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 822 { 823 int ierr; 824 PetscTruth isascii,isdraw,issocket,isbinary; 825 826 PetscFunctionBegin; 827 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 828 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 829 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 830 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 831 if (isascii || isdraw || isbinary || issocket) { 832 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 833 } else { 834 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 835 } 836 PetscFunctionReturn(0); 837 } 838 839 840 841 #undef __FUNCT__ 842 #define __FUNCT__ "MatRelax_MPIAIJ" 843 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 844 { 845 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 846 int ierr; 847 Vec bb1; 848 PetscScalar mone=-1.0; 849 850 PetscFunctionBegin; 851 if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 852 853 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 854 855 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 856 if (flag & SOR_ZERO_INITIAL_GUESS) { 857 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr); 858 its--; 859 } 860 861 while (its--) { 862 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 863 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 864 865 /* update rhs: bb1 = bb - B*x */ 866 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 867 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 868 869 /* local sweep */ 870 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx); 871 CHKERRQ(ierr); 872 } 873 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 874 if (flag & SOR_ZERO_INITIAL_GUESS) { 875 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 876 its--; 877 } 878 while (its--) { 879 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 880 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 881 882 /* update rhs: bb1 = bb - B*x */ 883 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 884 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 885 886 /* local sweep */ 887 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 888 CHKERRQ(ierr); 889 } 890 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 891 if (flag & SOR_ZERO_INITIAL_GUESS) { 892 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr); 893 its--; 894 } 895 while (its--) { 896 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 897 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 898 899 /* update rhs: bb1 = bb - B*x */ 900 ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr); 901 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 902 903 /* local sweep */ 904 ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx); 905 CHKERRQ(ierr); 906 } 907 } else { 908 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 909 } 910 911 ierr = VecDestroy(bb1);CHKERRQ(ierr); 912 PetscFunctionReturn(0); 913 } 914 915 #undef __FUNCT__ 916 #define __FUNCT__ "MatGetInfo_MPIAIJ" 917 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 918 { 919 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 920 Mat A = mat->A,B = mat->B; 921 int ierr; 922 PetscReal isend[5],irecv[5]; 923 924 PetscFunctionBegin; 925 info->block_size = 1.0; 926 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 927 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 928 isend[3] = info->memory; isend[4] = info->mallocs; 929 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 930 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 931 isend[3] += info->memory; isend[4] += info->mallocs; 932 if (flag == MAT_LOCAL) { 933 info->nz_used = isend[0]; 934 info->nz_allocated = isend[1]; 935 info->nz_unneeded = isend[2]; 936 info->memory = isend[3]; 937 info->mallocs = isend[4]; 938 } else if (flag == MAT_GLOBAL_MAX) { 939 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr); 940 info->nz_used = irecv[0]; 941 info->nz_allocated = irecv[1]; 942 info->nz_unneeded = irecv[2]; 943 info->memory = irecv[3]; 944 info->mallocs = irecv[4]; 945 } else if (flag == MAT_GLOBAL_SUM) { 946 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr); 947 info->nz_used = irecv[0]; 948 info->nz_allocated = irecv[1]; 949 info->nz_unneeded = irecv[2]; 950 info->memory = irecv[3]; 951 info->mallocs = irecv[4]; 952 } 953 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 954 info->fill_ratio_needed = 0; 955 info->factor_mallocs = 0; 956 info->rows_global = (double)matin->M; 957 info->columns_global = (double)matin->N; 958 info->rows_local = (double)matin->m; 959 info->columns_local = (double)matin->N; 960 961 PetscFunctionReturn(0); 962 } 963 964 #undef __FUNCT__ 965 #define __FUNCT__ "MatSetOption_MPIAIJ" 966 int MatSetOption_MPIAIJ(Mat A,MatOption op) 967 { 968 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 969 int ierr; 970 971 PetscFunctionBegin; 972 switch (op) { 973 case MAT_NO_NEW_NONZERO_LOCATIONS: 974 case MAT_YES_NEW_NONZERO_LOCATIONS: 975 case MAT_COLUMNS_UNSORTED: 976 case MAT_COLUMNS_SORTED: 977 case MAT_NEW_NONZERO_ALLOCATION_ERR: 978 case MAT_KEEP_ZEROED_ROWS: 979 case MAT_NEW_NONZERO_LOCATION_ERR: 980 case MAT_USE_INODES: 981 case MAT_DO_NOT_USE_INODES: 982 case MAT_IGNORE_ZERO_ENTRIES: 983 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 984 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 985 break; 986 case MAT_ROW_ORIENTED: 987 a->roworiented = PETSC_TRUE; 988 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 989 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 990 break; 991 case MAT_ROWS_SORTED: 992 case MAT_ROWS_UNSORTED: 993 case MAT_YES_NEW_DIAGONALS: 994 case MAT_USE_SINGLE_PRECISION_SOLVES: 995 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 996 break; 997 case MAT_COLUMN_ORIENTED: 998 a->roworiented = PETSC_FALSE; 999 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1000 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1001 break; 1002 case MAT_IGNORE_OFF_PROC_ENTRIES: 1003 a->donotstash = PETSC_TRUE; 1004 break; 1005 case MAT_NO_NEW_DIAGONALS: 1006 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1007 default: 1008 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1009 } 1010 PetscFunctionReturn(0); 1011 } 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatGetRow_MPIAIJ" 1015 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v) 1016 { 1017 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1018 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1019 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1020 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1021 int *cmap,*idx_p; 1022 1023 PetscFunctionBegin; 1024 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1025 mat->getrowactive = PETSC_TRUE; 1026 1027 if (!mat->rowvalues && (idx || v)) { 1028 /* 1029 allocate enough space to hold information from the longest row. 1030 */ 1031 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1032 int max = 1,tmp; 1033 for (i=0; i<matin->m; i++) { 1034 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1035 if (max < tmp) { max = tmp; } 1036 } 1037 ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr); 1038 mat->rowindices = (int*)(mat->rowvalues + max); 1039 } 1040 1041 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1042 lrow = row - rstart; 1043 1044 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1045 if (!v) {pvA = 0; pvB = 0;} 1046 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1047 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1048 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1049 nztot = nzA + nzB; 1050 1051 cmap = mat->garray; 1052 if (v || idx) { 1053 if (nztot) { 1054 /* Sort by increasing column numbers, assuming A and B already sorted */ 1055 int imark = -1; 1056 if (v) { 1057 *v = v_p = mat->rowvalues; 1058 for (i=0; i<nzB; i++) { 1059 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1060 else break; 1061 } 1062 imark = i; 1063 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1064 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1065 } 1066 if (idx) { 1067 *idx = idx_p = mat->rowindices; 1068 if (imark > -1) { 1069 for (i=0; i<imark; i++) { 1070 idx_p[i] = cmap[cworkB[i]]; 1071 } 1072 } else { 1073 for (i=0; i<nzB; i++) { 1074 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1075 else break; 1076 } 1077 imark = i; 1078 } 1079 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1080 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1081 } 1082 } else { 1083 if (idx) *idx = 0; 1084 if (v) *v = 0; 1085 } 1086 } 1087 *nz = nztot; 1088 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1089 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1095 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v) 1096 { 1097 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1098 1099 PetscFunctionBegin; 1100 if (aij->getrowactive == PETSC_FALSE) { 1101 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1102 } 1103 aij->getrowactive = PETSC_FALSE; 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatNorm_MPIAIJ" 1109 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1110 { 1111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1112 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1113 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1114 PetscReal sum = 0.0; 1115 PetscScalar *v; 1116 1117 PetscFunctionBegin; 1118 if (aij->size == 1) { 1119 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1120 } else { 1121 if (type == NORM_FROBENIUS) { 1122 v = amat->a; 1123 for (i=0; i<amat->nz; i++) { 1124 #if defined(PETSC_USE_COMPLEX) 1125 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1126 #else 1127 sum += (*v)*(*v); v++; 1128 #endif 1129 } 1130 v = bmat->a; 1131 for (i=0; i<bmat->nz; i++) { 1132 #if defined(PETSC_USE_COMPLEX) 1133 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1134 #else 1135 sum += (*v)*(*v); v++; 1136 #endif 1137 } 1138 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1139 *norm = sqrt(*norm); 1140 } else if (type == NORM_1) { /* max column norm */ 1141 PetscReal *tmp,*tmp2; 1142 int *jj,*garray = aij->garray; 1143 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1144 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1145 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1146 *norm = 0.0; 1147 v = amat->a; jj = amat->j; 1148 for (j=0; j<amat->nz; j++) { 1149 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1150 } 1151 v = bmat->a; jj = bmat->j; 1152 for (j=0; j<bmat->nz; j++) { 1153 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1154 } 1155 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr); 1156 for (j=0; j<mat->N; j++) { 1157 if (tmp2[j] > *norm) *norm = tmp2[j]; 1158 } 1159 ierr = PetscFree(tmp);CHKERRQ(ierr); 1160 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1161 } else if (type == NORM_INFINITY) { /* max row norm */ 1162 PetscReal ntemp = 0.0; 1163 for (j=0; j<aij->A->m; j++) { 1164 v = amat->a + amat->i[j] + shift; 1165 sum = 0.0; 1166 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1167 sum += PetscAbsScalar(*v); v++; 1168 } 1169 v = bmat->a + bmat->i[j] + shift; 1170 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1171 sum += PetscAbsScalar(*v); v++; 1172 } 1173 if (sum > ntemp) ntemp = sum; 1174 } 1175 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr); 1176 } else { 1177 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1178 } 1179 } 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "MatTranspose_MPIAIJ" 1185 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1186 { 1187 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1188 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1189 int ierr,shift = Aloc->indexshift; 1190 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1191 Mat B; 1192 PetscScalar *array; 1193 1194 PetscFunctionBegin; 1195 if (!matout && M != N) { 1196 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1197 } 1198 1199 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1200 1201 /* copy over the A part */ 1202 Aloc = (Mat_SeqAIJ*)a->A->data; 1203 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1204 row = a->rstart; 1205 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1206 for (i=0; i<m; i++) { 1207 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1208 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1209 } 1210 aj = Aloc->j; 1211 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1212 1213 /* copy over the B part */ 1214 Aloc = (Mat_SeqAIJ*)a->B->data; 1215 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1216 row = a->rstart; 1217 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1218 ct = cols; 1219 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1220 for (i=0; i<m; i++) { 1221 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1222 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1223 } 1224 ierr = PetscFree(ct);CHKERRQ(ierr); 1225 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1226 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1227 if (matout) { 1228 *matout = B; 1229 } else { 1230 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1231 } 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1237 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1238 { 1239 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1240 Mat a = aij->A,b = aij->B; 1241 int ierr,s1,s2,s3; 1242 1243 PetscFunctionBegin; 1244 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1245 if (rr) { 1246 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1247 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1248 /* Overlap communication with computation. */ 1249 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1250 } 1251 if (ll) { 1252 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1253 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1254 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1255 } 1256 /* scale the diagonal block */ 1257 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1258 1259 if (rr) { 1260 /* Do a scatter end and then right scale the off-diagonal block */ 1261 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1262 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1263 } 1264 1265 PetscFunctionReturn(0); 1266 } 1267 1268 1269 #undef __FUNCT__ 1270 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1271 int MatPrintHelp_MPIAIJ(Mat A) 1272 { 1273 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1274 int ierr; 1275 1276 PetscFunctionBegin; 1277 if (!a->rank) { 1278 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1279 } 1280 PetscFunctionReturn(0); 1281 } 1282 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1285 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1286 { 1287 PetscFunctionBegin; 1288 *bs = 1; 1289 PetscFunctionReturn(0); 1290 } 1291 #undef __FUNCT__ 1292 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1293 int MatSetUnfactored_MPIAIJ(Mat A) 1294 { 1295 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1296 int ierr; 1297 1298 PetscFunctionBegin; 1299 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNCT__ 1304 #define __FUNCT__ "MatEqual_MPIAIJ" 1305 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1306 { 1307 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1308 Mat a,b,c,d; 1309 PetscTruth flg; 1310 int ierr; 1311 1312 PetscFunctionBegin; 1313 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1314 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1315 a = matA->A; b = matA->B; 1316 c = matB->A; d = matB->B; 1317 1318 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1319 if (flg == PETSC_TRUE) { 1320 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1321 } 1322 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1323 PetscFunctionReturn(0); 1324 } 1325 1326 #undef __FUNCT__ 1327 #define __FUNCT__ "MatCopy_MPIAIJ" 1328 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1329 { 1330 int ierr; 1331 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1332 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1333 PetscTruth flg; 1334 1335 PetscFunctionBegin; 1336 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1337 if (str != SAME_NONZERO_PATTERN || !flg) { 1338 /* because of the column compression in the off-processor part of the matrix a->B, 1339 the number of columns in a->B and b->B may be different, hence we cannot call 1340 the MatCopy() directly on the two parts. If need be, we can provide a more 1341 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1342 then copying the submatrices */ 1343 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1344 } else { 1345 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1346 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1347 } 1348 PetscFunctionReturn(0); 1349 } 1350 1351 #undef __FUNCT__ 1352 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1353 int MatSetUpPreallocation_MPIAIJ(Mat A) 1354 { 1355 int ierr; 1356 1357 PetscFunctionBegin; 1358 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1359 PetscFunctionReturn(0); 1360 } 1361 1362 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1363 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1364 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1365 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1366 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1367 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1368 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1369 #endif 1370 1371 #include "petscblaslapack.h" 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatAXPY_MPIAIJ" 1375 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1376 { 1377 int ierr,one; 1378 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 1379 Mat_SeqAIJ *x,*y; 1380 1381 PetscFunctionBegin; 1382 if (str == SAME_NONZERO_PATTERN) { 1383 x = (Mat_SeqAIJ *)xx->A->data; 1384 y = (Mat_SeqAIJ *)yy->A->data; 1385 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1386 x = (Mat_SeqAIJ *)xx->B->data; 1387 y = (Mat_SeqAIJ *)yy->B->data; 1388 BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 1389 } else { 1390 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1391 } 1392 PetscFunctionReturn(0); 1393 } 1394 1395 /* -------------------------------------------------------------------*/ 1396 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1397 MatGetRow_MPIAIJ, 1398 MatRestoreRow_MPIAIJ, 1399 MatMult_MPIAIJ, 1400 MatMultAdd_MPIAIJ, 1401 MatMultTranspose_MPIAIJ, 1402 MatMultTransposeAdd_MPIAIJ, 1403 0, 1404 0, 1405 0, 1406 0, 1407 0, 1408 0, 1409 MatRelax_MPIAIJ, 1410 MatTranspose_MPIAIJ, 1411 MatGetInfo_MPIAIJ, 1412 MatEqual_MPIAIJ, 1413 MatGetDiagonal_MPIAIJ, 1414 MatDiagonalScale_MPIAIJ, 1415 MatNorm_MPIAIJ, 1416 MatAssemblyBegin_MPIAIJ, 1417 MatAssemblyEnd_MPIAIJ, 1418 0, 1419 MatSetOption_MPIAIJ, 1420 MatZeroEntries_MPIAIJ, 1421 MatZeroRows_MPIAIJ, 1422 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) 1423 MatLUFactorSymbolic_MPIAIJ_TFS, 1424 #else 1425 0, 1426 #endif 1427 0, 1428 0, 1429 0, 1430 MatSetUpPreallocation_MPIAIJ, 1431 0, 1432 0, 1433 0, 1434 0, 1435 MatDuplicate_MPIAIJ, 1436 0, 1437 0, 1438 0, 1439 0, 1440 MatAXPY_MPIAIJ, 1441 MatGetSubMatrices_MPIAIJ, 1442 MatIncreaseOverlap_MPIAIJ, 1443 MatGetValues_MPIAIJ, 1444 MatCopy_MPIAIJ, 1445 MatPrintHelp_MPIAIJ, 1446 MatScale_MPIAIJ, 1447 0, 1448 0, 1449 0, 1450 MatGetBlockSize_MPIAIJ, 1451 0, 1452 0, 1453 0, 1454 0, 1455 MatFDColoringCreate_MPIAIJ, 1456 0, 1457 MatSetUnfactored_MPIAIJ, 1458 0, 1459 0, 1460 MatGetSubMatrix_MPIAIJ, 1461 MatDestroy_MPIAIJ, 1462 MatView_MPIAIJ, 1463 MatGetPetscMaps_Petsc, 1464 0, 1465 0, 1466 0, 1467 0, 1468 0, 1469 0, 1470 0, 1471 0, 1472 MatSetColoring_MPIAIJ, 1473 MatSetValuesAdic_MPIAIJ, 1474 MatSetValuesAdifor_MPIAIJ 1475 }; 1476 1477 /* ----------------------------------------------------------------------------------------*/ 1478 1479 EXTERN_C_BEGIN 1480 #undef __FUNCT__ 1481 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1482 int MatStoreValues_MPIAIJ(Mat mat) 1483 { 1484 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1485 int ierr; 1486 1487 PetscFunctionBegin; 1488 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1489 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1490 PetscFunctionReturn(0); 1491 } 1492 EXTERN_C_END 1493 1494 EXTERN_C_BEGIN 1495 #undef __FUNCT__ 1496 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1497 int MatRetrieveValues_MPIAIJ(Mat mat) 1498 { 1499 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1500 int ierr; 1501 1502 PetscFunctionBegin; 1503 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1504 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1505 PetscFunctionReturn(0); 1506 } 1507 EXTERN_C_END 1508 1509 #include "petscpc.h" 1510 EXTERN_C_BEGIN 1511 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1512 EXTERN_C_END 1513 1514 EXTERN_C_BEGIN 1515 #undef __FUNCT__ 1516 #define __FUNCT__ "MatCreate_MPIAIJ" 1517 int MatCreate_MPIAIJ(Mat B) 1518 { 1519 Mat_MPIAIJ *b; 1520 int ierr,i,size; 1521 #if defined(PETSC_HAVE_SUPERLUDIST) 1522 PetscTruth flg; 1523 #endif 1524 1525 1526 PetscFunctionBegin; 1527 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1528 1529 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1530 B->data = (void*)b; 1531 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1532 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1533 B->factor = 0; 1534 B->assembled = PETSC_FALSE; 1535 B->mapping = 0; 1536 1537 B->insertmode = NOT_SET_VALUES; 1538 b->size = size; 1539 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1540 1541 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1542 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1543 1544 /* the information in the maps duplicates the information computed below, eventually 1545 we should remove the duplicate information that is not contained in the maps */ 1546 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1547 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1548 1549 /* build local table of row and column ownerships */ 1550 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1551 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1552 b->cowners = b->rowners + b->size + 2; 1553 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1554 b->rowners[0] = 0; 1555 for (i=2; i<=b->size; i++) { 1556 b->rowners[i] += b->rowners[i-1]; 1557 } 1558 b->rstart = b->rowners[b->rank]; 1559 b->rend = b->rowners[b->rank+1]; 1560 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1561 b->cowners[0] = 0; 1562 for (i=2; i<=b->size; i++) { 1563 b->cowners[i] += b->cowners[i-1]; 1564 } 1565 b->cstart = b->cowners[b->rank]; 1566 b->cend = b->cowners[b->rank+1]; 1567 1568 /* build cache for off array entries formed */ 1569 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1570 b->donotstash = PETSC_FALSE; 1571 b->colmap = 0; 1572 b->garray = 0; 1573 b->roworiented = PETSC_TRUE; 1574 1575 /* stuff used for matrix vector multiply */ 1576 b->lvec = PETSC_NULL; 1577 b->Mvctx = PETSC_NULL; 1578 1579 /* stuff for MatGetRow() */ 1580 b->rowindices = 0; 1581 b->rowvalues = 0; 1582 b->getrowactive = PETSC_FALSE; 1583 1584 #if defined(PETSC_HAVE_SUPERLUDIST) 1585 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_superlu_dist",&flg);CHKERRQ(ierr); 1586 if (flg) { ierr = MatUseSuperLU_DIST_MPIAIJ(B);CHKERRQ(ierr); } 1587 #endif 1588 1589 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1590 "MatStoreValues_MPIAIJ", 1591 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1592 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1593 "MatRetrieveValues_MPIAIJ", 1594 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1595 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1596 "MatGetDiagonalBlock_MPIAIJ", 1597 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1598 1599 PetscFunctionReturn(0); 1600 } 1601 EXTERN_C_END 1602 1603 #undef __FUNCT__ 1604 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1605 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1606 { 1607 Mat mat; 1608 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1609 int ierr; 1610 1611 PetscFunctionBegin; 1612 *newmat = 0; 1613 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1614 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1615 a = (Mat_MPIAIJ*)mat->data; 1616 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1617 mat->factor = matin->factor; 1618 mat->assembled = PETSC_TRUE; 1619 mat->insertmode = NOT_SET_VALUES; 1620 mat->preallocated = PETSC_TRUE; 1621 1622 a->rstart = oldmat->rstart; 1623 a->rend = oldmat->rend; 1624 a->cstart = oldmat->cstart; 1625 a->cend = oldmat->cend; 1626 a->size = oldmat->size; 1627 a->rank = oldmat->rank; 1628 a->donotstash = oldmat->donotstash; 1629 a->roworiented = oldmat->roworiented; 1630 a->rowindices = 0; 1631 a->rowvalues = 0; 1632 a->getrowactive = PETSC_FALSE; 1633 1634 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1635 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1636 if (oldmat->colmap) { 1637 #if defined (PETSC_USE_CTABLE) 1638 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1639 #else 1640 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1641 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1642 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1643 #endif 1644 } else a->colmap = 0; 1645 if (oldmat->garray) { 1646 int len; 1647 len = oldmat->B->n; 1648 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1649 PetscLogObjectMemory(mat,len*sizeof(int)); 1650 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1651 } else a->garray = 0; 1652 1653 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1654 PetscLogObjectParent(mat,a->lvec); 1655 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1656 PetscLogObjectParent(mat,a->Mvctx); 1657 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1658 PetscLogObjectParent(mat,a->A); 1659 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1660 PetscLogObjectParent(mat,a->B); 1661 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1662 *newmat = mat; 1663 PetscFunctionReturn(0); 1664 } 1665 1666 #include "petscsys.h" 1667 1668 EXTERN_C_BEGIN 1669 #undef __FUNCT__ 1670 #define __FUNCT__ "MatLoad_MPIAIJ" 1671 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1672 { 1673 Mat A; 1674 PetscScalar *vals,*svals; 1675 MPI_Comm comm = ((PetscObject)viewer)->comm; 1676 MPI_Status status; 1677 int i,nz,ierr,j,rstart,rend,fd; 1678 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1679 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1680 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1681 1682 PetscFunctionBegin; 1683 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1684 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1685 if (!rank) { 1686 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1687 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1688 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1689 if (header[3] < 0) { 1690 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1691 } 1692 } 1693 1694 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1695 M = header[1]; N = header[2]; 1696 /* determine ownership of all rows */ 1697 m = M/size + ((M % size) > rank); 1698 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1699 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1700 rowners[0] = 0; 1701 for (i=2; i<=size; i++) { 1702 rowners[i] += rowners[i-1]; 1703 } 1704 rstart = rowners[rank]; 1705 rend = rowners[rank+1]; 1706 1707 /* distribute row lengths to all processors */ 1708 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1709 offlens = ourlens + (rend-rstart); 1710 if (!rank) { 1711 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1712 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1713 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1714 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1715 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1716 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1717 } else { 1718 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1719 } 1720 1721 if (!rank) { 1722 /* calculate the number of nonzeros on each processor */ 1723 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1724 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1725 for (i=0; i<size; i++) { 1726 for (j=rowners[i]; j< rowners[i+1]; j++) { 1727 procsnz[i] += rowlengths[j]; 1728 } 1729 } 1730 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1731 1732 /* determine max buffer needed and allocate it */ 1733 maxnz = 0; 1734 for (i=0; i<size; i++) { 1735 maxnz = PetscMax(maxnz,procsnz[i]); 1736 } 1737 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1738 1739 /* read in my part of the matrix column indices */ 1740 nz = procsnz[0]; 1741 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1742 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1743 1744 /* read in every one elses and ship off */ 1745 for (i=1; i<size; i++) { 1746 nz = procsnz[i]; 1747 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1748 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1749 } 1750 ierr = PetscFree(cols);CHKERRQ(ierr); 1751 } else { 1752 /* determine buffer space needed for message */ 1753 nz = 0; 1754 for (i=0; i<m; i++) { 1755 nz += ourlens[i]; 1756 } 1757 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1758 1759 /* receive message of column indices*/ 1760 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1761 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1762 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1763 } 1764 1765 /* determine column ownership if matrix is not square */ 1766 if (N != M) { 1767 n = N/size + ((N % size) > rank); 1768 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1769 cstart = cend - n; 1770 } else { 1771 cstart = rstart; 1772 cend = rend; 1773 n = cend - cstart; 1774 } 1775 1776 /* loop over local rows, determining number of off diagonal entries */ 1777 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1778 jj = 0; 1779 for (i=0; i<m; i++) { 1780 for (j=0; j<ourlens[i]; j++) { 1781 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1782 jj++; 1783 } 1784 } 1785 1786 /* create our matrix */ 1787 for (i=0; i<m; i++) { 1788 ourlens[i] -= offlens[i]; 1789 } 1790 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1791 A = *newmat; 1792 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1793 for (i=0; i<m; i++) { 1794 ourlens[i] += offlens[i]; 1795 } 1796 1797 if (!rank) { 1798 ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1799 1800 /* read in my part of the matrix numerical values */ 1801 nz = procsnz[0]; 1802 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1803 1804 /* insert into matrix */ 1805 jj = rstart; 1806 smycols = mycols; 1807 svals = vals; 1808 for (i=0; i<m; i++) { 1809 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1810 smycols += ourlens[i]; 1811 svals += ourlens[i]; 1812 jj++; 1813 } 1814 1815 /* read in other processors and ship out */ 1816 for (i=1; i<size; i++) { 1817 nz = procsnz[i]; 1818 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1819 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1820 } 1821 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1822 } else { 1823 /* receive numeric values */ 1824 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 1825 1826 /* receive message of values*/ 1827 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1828 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1829 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1830 1831 /* insert into matrix */ 1832 jj = rstart; 1833 smycols = mycols; 1834 svals = vals; 1835 for (i=0; i<m; i++) { 1836 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1837 smycols += ourlens[i]; 1838 svals += ourlens[i]; 1839 jj++; 1840 } 1841 } 1842 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1843 ierr = PetscFree(vals);CHKERRQ(ierr); 1844 ierr = PetscFree(mycols);CHKERRQ(ierr); 1845 ierr = PetscFree(rowners);CHKERRQ(ierr); 1846 1847 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1848 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1849 PetscFunctionReturn(0); 1850 } 1851 EXTERN_C_END 1852 1853 #undef __FUNCT__ 1854 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1855 /* 1856 Not great since it makes two copies of the submatrix, first an SeqAIJ 1857 in local and then by concatenating the local matrices the end result. 1858 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1859 */ 1860 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1861 { 1862 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1863 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1864 Mat *local,M,Mreuse; 1865 PetscScalar *vwork,*aa; 1866 MPI_Comm comm = mat->comm; 1867 Mat_SeqAIJ *aij; 1868 1869 1870 PetscFunctionBegin; 1871 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1872 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1873 1874 if (call == MAT_REUSE_MATRIX) { 1875 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1876 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1877 local = &Mreuse; 1878 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1879 } else { 1880 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1881 Mreuse = *local; 1882 ierr = PetscFree(local);CHKERRQ(ierr); 1883 } 1884 1885 /* 1886 m - number of local rows 1887 n - number of columns (same on all processors) 1888 rstart - first row in new global matrix generated 1889 */ 1890 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1891 if (call == MAT_INITIAL_MATRIX) { 1892 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1893 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1894 ii = aij->i; 1895 jj = aij->j; 1896 1897 /* 1898 Determine the number of non-zeros in the diagonal and off-diagonal 1899 portions of the matrix in order to do correct preallocation 1900 */ 1901 1902 /* first get start and end of "diagonal" columns */ 1903 if (csize == PETSC_DECIDE) { 1904 nlocal = n/size + ((n % size) > rank); 1905 } else { 1906 nlocal = csize; 1907 } 1908 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1909 rstart = rend - nlocal; 1910 if (rank == size - 1 && rend != n) { 1911 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1912 } 1913 1914 /* next, compute all the lengths */ 1915 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1916 olens = dlens + m; 1917 for (i=0; i<m; i++) { 1918 jend = ii[i+1] - ii[i]; 1919 olen = 0; 1920 dlen = 0; 1921 for (j=0; j<jend; j++) { 1922 if (*jj < rstart || *jj >= rend) olen++; 1923 else dlen++; 1924 jj++; 1925 } 1926 olens[i] = olen; 1927 dlens[i] = dlen; 1928 } 1929 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1930 ierr = PetscFree(dlens);CHKERRQ(ierr); 1931 } else { 1932 int ml,nl; 1933 1934 M = *newmat; 1935 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1936 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1937 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1938 /* 1939 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1940 rather than the slower MatSetValues(). 1941 */ 1942 M->was_assembled = PETSC_TRUE; 1943 M->assembled = PETSC_FALSE; 1944 } 1945 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1946 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1947 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1948 ii = aij->i; 1949 jj = aij->j; 1950 aa = aij->a; 1951 for (i=0; i<m; i++) { 1952 row = rstart + i; 1953 nz = ii[i+1] - ii[i]; 1954 cwork = jj; jj += nz; 1955 vwork = aa; aa += nz; 1956 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1957 } 1958 1959 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1960 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1961 *newmat = M; 1962 1963 /* save submatrix used in processor for next request */ 1964 if (call == MAT_INITIAL_MATRIX) { 1965 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 1966 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 1967 } 1968 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatMPIAIJSetPreallocation" 1974 /*@C 1975 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 1976 (the default parallel PETSc format). For good matrix assembly performance 1977 the user should preallocate the matrix storage by setting the parameters 1978 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1979 performance can be increased by more than a factor of 50. 1980 1981 Collective on MPI_Comm 1982 1983 Input Parameters: 1984 + A - the matrix 1985 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1986 (same value is used for all local rows) 1987 . d_nnz - array containing the number of nonzeros in the various rows of the 1988 DIAGONAL portion of the local submatrix (possibly different for each row) 1989 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1990 The size of this array is equal to the number of local rows, i.e 'm'. 1991 You must leave room for the diagonal entry even if it is zero. 1992 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1993 submatrix (same value is used for all local rows). 1994 - o_nnz - array containing the number of nonzeros in the various rows of the 1995 OFF-DIAGONAL portion of the local submatrix (possibly different for 1996 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1997 structure. The size of this array is equal to the number 1998 of local rows, i.e 'm'. 1999 2000 The AIJ format (also called the Yale sparse matrix format or 2001 compressed row storage), is fully compatible with standard Fortran 77 2002 storage. That is, the stored row and column indices can begin at 2003 either one (as in Fortran) or zero. See the users manual for details. 2004 2005 The user MUST specify either the local or global matrix dimensions 2006 (possibly both). 2007 2008 The parallel matrix is partitioned such that the first m0 rows belong to 2009 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2010 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2011 2012 The DIAGONAL portion of the local submatrix of a processor can be defined 2013 as the submatrix which is obtained by extraction the part corresponding 2014 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2015 first row that belongs to the processor, and r2 is the last row belonging 2016 to the this processor. This is a square mxm matrix. The remaining portion 2017 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2018 2019 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2020 2021 By default, this format uses inodes (identical nodes) when possible. 2022 We search for consecutive rows with the same nonzero structure, thereby 2023 reusing matrix information to achieve increased efficiency. 2024 2025 Options Database Keys: 2026 + -mat_aij_no_inode - Do not use inodes 2027 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2028 - -mat_aij_oneindex - Internally use indexing starting at 1 2029 rather than 0. Note that when calling MatSetValues(), 2030 the user still MUST index entries starting at 0! 2031 2032 Example usage: 2033 2034 Consider the following 8x8 matrix with 34 non-zero values, that is 2035 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2036 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2037 as follows: 2038 2039 .vb 2040 1 2 0 | 0 3 0 | 0 4 2041 Proc0 0 5 6 | 7 0 0 | 8 0 2042 9 0 10 | 11 0 0 | 12 0 2043 ------------------------------------- 2044 13 0 14 | 15 16 17 | 0 0 2045 Proc1 0 18 0 | 19 20 21 | 0 0 2046 0 0 0 | 22 23 0 | 24 0 2047 ------------------------------------- 2048 Proc2 25 26 27 | 0 0 28 | 29 0 2049 30 0 0 | 31 32 33 | 0 34 2050 .ve 2051 2052 This can be represented as a collection of submatrices as: 2053 2054 .vb 2055 A B C 2056 D E F 2057 G H I 2058 .ve 2059 2060 Where the submatrices A,B,C are owned by proc0, D,E,F are 2061 owned by proc1, G,H,I are owned by proc2. 2062 2063 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2064 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2065 The 'M','N' parameters are 8,8, and have the same values on all procs. 2066 2067 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2068 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2069 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2070 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2071 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2072 matrix, ans [DF] as another SeqAIJ matrix. 2073 2074 When d_nz, o_nz parameters are specified, d_nz storage elements are 2075 allocated for every row of the local diagonal submatrix, and o_nz 2076 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2077 One way to choose d_nz and o_nz is to use the max nonzerors per local 2078 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2079 In this case, the values of d_nz,o_nz are: 2080 .vb 2081 proc0 : dnz = 2, o_nz = 2 2082 proc1 : dnz = 3, o_nz = 2 2083 proc2 : dnz = 1, o_nz = 4 2084 .ve 2085 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2086 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2087 for proc3. i.e we are using 12+15+10=37 storage locations to store 2088 34 values. 2089 2090 When d_nnz, o_nnz parameters are specified, the storage is specified 2091 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2092 In the above case the values for d_nnz,o_nnz are: 2093 .vb 2094 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2095 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2096 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2097 .ve 2098 Here the space allocated is sum of all the above values i.e 34, and 2099 hence pre-allocation is perfect. 2100 2101 Level: intermediate 2102 2103 .keywords: matrix, aij, compressed row, sparse, parallel 2104 2105 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2106 @*/ 2107 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2108 { 2109 Mat_MPIAIJ *b; 2110 int ierr,i; 2111 PetscTruth flg2; 2112 2113 PetscFunctionBegin; 2114 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2115 if (!flg2) PetscFunctionReturn(0); 2116 B->preallocated = PETSC_TRUE; 2117 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2118 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2119 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2120 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2121 if (d_nnz) { 2122 for (i=0; i<B->m; i++) { 2123 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2124 } 2125 } 2126 if (o_nnz) { 2127 for (i=0; i<B->m; i++) { 2128 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2129 } 2130 } 2131 b = (Mat_MPIAIJ*)B->data; 2132 2133 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2134 PetscLogObjectParent(B,b->A); 2135 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2136 PetscLogObjectParent(B,b->B); 2137 2138 PetscFunctionReturn(0); 2139 } 2140 2141 #undef __FUNCT__ 2142 #define __FUNCT__ "MatCreateMPIAIJ" 2143 /*@C 2144 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2145 (the default parallel PETSc format). For good matrix assembly performance 2146 the user should preallocate the matrix storage by setting the parameters 2147 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2148 performance can be increased by more than a factor of 50. 2149 2150 Collective on MPI_Comm 2151 2152 Input Parameters: 2153 + comm - MPI communicator 2154 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2155 This value should be the same as the local size used in creating the 2156 y vector for the matrix-vector product y = Ax. 2157 . n - This value should be the same as the local size used in creating the 2158 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2159 calculated if N is given) For square matrices n is almost always m. 2160 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2161 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2162 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2163 (same value is used for all local rows) 2164 . d_nnz - array containing the number of nonzeros in the various rows of the 2165 DIAGONAL portion of the local submatrix (possibly different for each row) 2166 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2167 The size of this array is equal to the number of local rows, i.e 'm'. 2168 You must leave room for the diagonal entry even if it is zero. 2169 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2170 submatrix (same value is used for all local rows). 2171 - o_nnz - array containing the number of nonzeros in the various rows of the 2172 OFF-DIAGONAL portion of the local submatrix (possibly different for 2173 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2174 structure. The size of this array is equal to the number 2175 of local rows, i.e 'm'. 2176 2177 Output Parameter: 2178 . A - the matrix 2179 2180 Notes: 2181 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2182 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2183 storage requirements for this matrix. 2184 2185 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2186 processor than it must be used on all processors that share the object for 2187 that argument. 2188 2189 The AIJ format (also called the Yale sparse matrix format or 2190 compressed row storage), is fully compatible with standard Fortran 77 2191 storage. That is, the stored row and column indices can begin at 2192 either one (as in Fortran) or zero. See the users manual for details. 2193 2194 The user MUST specify either the local or global matrix dimensions 2195 (possibly both). 2196 2197 The parallel matrix is partitioned such that the first m0 rows belong to 2198 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2199 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2200 2201 The DIAGONAL portion of the local submatrix of a processor can be defined 2202 as the submatrix which is obtained by extraction the part corresponding 2203 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2204 first row that belongs to the processor, and r2 is the last row belonging 2205 to the this processor. This is a square mxm matrix. The remaining portion 2206 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2207 2208 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2209 2210 By default, this format uses inodes (identical nodes) when possible. 2211 We search for consecutive rows with the same nonzero structure, thereby 2212 reusing matrix information to achieve increased efficiency. 2213 2214 Options Database Keys: 2215 + -mat_aij_no_inode - Do not use inodes 2216 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2217 - -mat_aij_oneindex - Internally use indexing starting at 1 2218 rather than 0. Note that when calling MatSetValues(), 2219 the user still MUST index entries starting at 0! 2220 2221 2222 Example usage: 2223 2224 Consider the following 8x8 matrix with 34 non-zero values, that is 2225 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2226 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2227 as follows: 2228 2229 .vb 2230 1 2 0 | 0 3 0 | 0 4 2231 Proc0 0 5 6 | 7 0 0 | 8 0 2232 9 0 10 | 11 0 0 | 12 0 2233 ------------------------------------- 2234 13 0 14 | 15 16 17 | 0 0 2235 Proc1 0 18 0 | 19 20 21 | 0 0 2236 0 0 0 | 22 23 0 | 24 0 2237 ------------------------------------- 2238 Proc2 25 26 27 | 0 0 28 | 29 0 2239 30 0 0 | 31 32 33 | 0 34 2240 .ve 2241 2242 This can be represented as a collection of submatrices as: 2243 2244 .vb 2245 A B C 2246 D E F 2247 G H I 2248 .ve 2249 2250 Where the submatrices A,B,C are owned by proc0, D,E,F are 2251 owned by proc1, G,H,I are owned by proc2. 2252 2253 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2254 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2255 The 'M','N' parameters are 8,8, and have the same values on all procs. 2256 2257 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2258 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2259 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2260 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2261 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2262 matrix, ans [DF] as another SeqAIJ matrix. 2263 2264 When d_nz, o_nz parameters are specified, d_nz storage elements are 2265 allocated for every row of the local diagonal submatrix, and o_nz 2266 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2267 One way to choose d_nz and o_nz is to use the max nonzerors per local 2268 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2269 In this case, the values of d_nz,o_nz are: 2270 .vb 2271 proc0 : dnz = 2, o_nz = 2 2272 proc1 : dnz = 3, o_nz = 2 2273 proc2 : dnz = 1, o_nz = 4 2274 .ve 2275 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2276 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2277 for proc3. i.e we are using 12+15+10=37 storage locations to store 2278 34 values. 2279 2280 When d_nnz, o_nnz parameters are specified, the storage is specified 2281 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2282 In the above case the values for d_nnz,o_nnz are: 2283 .vb 2284 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2285 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2286 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2287 .ve 2288 Here the space allocated is sum of all the above values i.e 34, and 2289 hence pre-allocation is perfect. 2290 2291 Level: intermediate 2292 2293 .keywords: matrix, aij, compressed row, sparse, parallel 2294 2295 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2296 @*/ 2297 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2298 { 2299 int ierr,size; 2300 2301 PetscFunctionBegin; 2302 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2303 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2304 if (size > 1) { 2305 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2306 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2307 } else { 2308 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2309 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2310 } 2311 PetscFunctionReturn(0); 2312 } 2313 2314 #undef __FUNCT__ 2315 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2316 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2317 { 2318 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2319 PetscFunctionBegin; 2320 *Ad = a->A; 2321 *Ao = a->B; 2322 *colmap = a->garray; 2323 PetscFunctionReturn(0); 2324 } 2325 2326 #undef __FUNCT__ 2327 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2328 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2329 { 2330 int ierr; 2331 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2332 2333 PetscFunctionBegin; 2334 if (coloring->ctype == IS_COLORING_LOCAL) { 2335 int *allcolors,*colors,i; 2336 ISColoring ocoloring; 2337 2338 /* set coloring for diagonal portion */ 2339 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2340 2341 /* set coloring for off-diagonal portion */ 2342 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2343 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2344 for (i=0; i<a->B->n; i++) { 2345 colors[i] = allcolors[a->garray[i]]; 2346 } 2347 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2348 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2349 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2350 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2351 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2352 int *colors,i,*larray; 2353 ISColoring ocoloring; 2354 2355 /* set coloring for diagonal portion */ 2356 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2357 for (i=0; i<a->A->n; i++) { 2358 larray[i] = i + a->cstart; 2359 } 2360 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2361 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2362 for (i=0; i<a->A->n; i++) { 2363 colors[i] = coloring->colors[larray[i]]; 2364 } 2365 ierr = PetscFree(larray);CHKERRQ(ierr); 2366 ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2367 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2368 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2369 2370 /* set coloring for off-diagonal portion */ 2371 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2372 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2373 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2374 for (i=0; i<a->B->n; i++) { 2375 colors[i] = coloring->colors[larray[i]]; 2376 } 2377 ierr = PetscFree(larray);CHKERRQ(ierr); 2378 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2379 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2380 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2381 } else { 2382 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2383 } 2384 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2390 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2391 { 2392 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2393 int ierr; 2394 2395 PetscFunctionBegin; 2396 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2397 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2403 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2404 { 2405 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2406 int ierr; 2407 2408 PetscFunctionBegin; 2409 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2410 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2411 PetscFunctionReturn(0); 2412 } 2413