1 /*$Id: mpiaij.c,v 1.336 2001/06/01 14:57:20 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 Scalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(Scalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(Scalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 Scalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(Scalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(Scalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 Scalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 Scalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 Scalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 Scalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 int i,j,rstart,ncols,n,ierr,flg; 354 int *row,*col,other_disassembled; 355 Scalar *val; 356 InsertMode addv = mat->insertmode; 357 358 PetscFunctionBegin; 359 if (!aij->donotstash) { 360 while (1) { 361 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 362 if (!flg) break; 363 364 for (i=0; i<n;) { 365 /* Now identify the consecutive vals belonging to the same row */ 366 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 367 if (j < n) ncols = j-i; 368 else ncols = n-i; 369 /* Now assemble all these values with a single function call */ 370 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 371 i = j; 372 } 373 } 374 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 375 } 376 377 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 378 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 379 380 /* determine if any processor has disassembled, if so we must 381 also disassemble ourselfs, in order that we may reassemble. */ 382 /* 383 if nonzero structure of submatrix B cannot change then we know that 384 no processor disassembled thus we can skip this stuff 385 */ 386 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 387 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 388 if (mat->was_assembled && !other_disassembled) { 389 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 390 } 391 } 392 393 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 398 399 if (aij->rowvalues) { 400 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 401 aij->rowvalues = 0; 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 408 int MatZeroEntries_MPIAIJ(Mat A) 409 { 410 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 411 int ierr; 412 413 PetscFunctionBegin; 414 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 415 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatZeroRows_MPIAIJ" 421 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 422 { 423 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 424 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 425 int *procs,*nprocs,j,idx,nsends,*work,row; 426 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 427 int *rvalues,tag = A->tag,count,base,slen,n,*source; 428 int *lens,imdex,*lrows,*values,rstart=l->rstart; 429 MPI_Comm comm = A->comm; 430 MPI_Request *send_waits,*recv_waits; 431 MPI_Status recv_status,*send_status; 432 IS istmp; 433 PetscTruth found; 434 435 PetscFunctionBegin; 436 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 437 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 438 439 /* first count number of contributors to each processor */ 440 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 441 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 442 procs = nprocs + size; 443 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 444 for (i=0; i<N; i++) { 445 idx = rows[i]; 446 found = PETSC_FALSE; 447 for (j=0; j<size; j++) { 448 if (idx >= owners[j] && idx < owners[j+1]) { 449 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 450 } 451 } 452 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 453 } 454 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 455 456 /* inform other processors of number of messages and max length*/ 457 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 458 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 459 nrecvs = work[size+rank]; 460 nmax = work[rank]; 461 ierr = PetscFree(work);CHKERRQ(ierr); 462 463 /* post receives: */ 464 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 465 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 466 for (i=0; i<nrecvs; i++) { 467 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 468 } 469 470 /* do sends: 471 1) starts[i] gives the starting index in svalues for stuff going to 472 the ith processor 473 */ 474 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 475 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 476 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 477 starts[0] = 0; 478 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 479 for (i=0; i<N; i++) { 480 svalues[starts[owner[i]]++] = rows[i]; 481 } 482 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 483 484 starts[0] = 0; 485 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 486 count = 0; 487 for (i=0; i<size; i++) { 488 if (procs[i]) { 489 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 490 } 491 } 492 ierr = PetscFree(starts);CHKERRQ(ierr); 493 494 base = owners[rank]; 495 496 /* wait on receives */ 497 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 498 source = lens + nrecvs; 499 count = nrecvs; slen = 0; 500 while (count) { 501 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 502 /* unpack receives into our local space */ 503 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 504 source[imdex] = recv_status.MPI_SOURCE; 505 lens[imdex] = n; 506 slen += n; 507 count--; 508 } 509 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 510 511 /* move the data into the send scatter */ 512 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 513 count = 0; 514 for (i=0; i<nrecvs; i++) { 515 values = rvalues + i*nmax; 516 for (j=0; j<lens[i]; j++) { 517 lrows[count++] = values[j] - base; 518 } 519 } 520 ierr = PetscFree(rvalues);CHKERRQ(ierr); 521 ierr = PetscFree(lens);CHKERRQ(ierr); 522 ierr = PetscFree(owner);CHKERRQ(ierr); 523 ierr = PetscFree(nprocs);CHKERRQ(ierr); 524 525 /* actually zap the local rows */ 526 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 527 PetscLogObjectParent(A,istmp); 528 529 /* 530 Zero the required rows. If the "diagonal block" of the matrix 531 is square and the user wishes to set the diagonal we use seperate 532 code so that MatSetValues() is not called for each diagonal allocating 533 new memory, thus calling lots of mallocs and slowing things down. 534 535 Contributed by: Mathew Knepley 536 */ 537 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 538 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 539 if (diag && (l->A->M == l->A->N)) { 540 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 541 } else if (diag) { 542 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 543 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 544 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 546 } 547 for (i = 0; i < slen; i++) { 548 row = lrows[i] + rstart; 549 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 550 } 551 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 552 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 553 } else { 554 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 555 } 556 ierr = ISDestroy(istmp);CHKERRQ(ierr); 557 ierr = PetscFree(lrows);CHKERRQ(ierr); 558 559 /* wait on sends */ 560 if (nsends) { 561 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 562 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 563 ierr = PetscFree(send_status);CHKERRQ(ierr); 564 } 565 ierr = PetscFree(send_waits);CHKERRQ(ierr); 566 ierr = PetscFree(svalues);CHKERRQ(ierr); 567 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "MatMult_MPIAIJ" 573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 574 { 575 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 576 int ierr,nt; 577 578 PetscFunctionBegin; 579 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 580 if (nt != A->n) { 581 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 582 } 583 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 584 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 585 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 586 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatMultAdd_MPIAIJ" 592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 593 { 594 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 595 int ierr; 596 597 PetscFunctionBegin; 598 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 599 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 600 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 601 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 607 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 608 { 609 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 610 int ierr; 611 612 PetscFunctionBegin; 613 /* do nondiagonal part */ 614 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 615 /* send it on its way */ 616 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 617 /* do local part */ 618 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 619 /* receive remote parts: note this assumes the values are not actually */ 620 /* inserted in yy until the next line, which is true for my implementation*/ 621 /* but is not perhaps always true. */ 622 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 628 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 629 { 630 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 631 int ierr; 632 633 PetscFunctionBegin; 634 /* do nondiagonal part */ 635 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 636 /* send it on its way */ 637 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 638 /* do local part */ 639 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 640 /* receive remote parts: note this assumes the values are not actually */ 641 /* inserted in yy until the next line, which is true for my implementation*/ 642 /* but is not perhaps always true. */ 643 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 /* 648 This only works correctly for square matrices where the subblock A->A is the 649 diagonal block 650 */ 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 654 { 655 int ierr; 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 658 PetscFunctionBegin; 659 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 660 if (a->rstart != a->cstart || a->rend != a->cend) { 661 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 662 } 663 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatScale_MPIAIJ" 669 int MatScale_MPIAIJ(Scalar *aa,Mat A) 670 { 671 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 672 int ierr; 673 674 PetscFunctionBegin; 675 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 676 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatDestroy_MPIAIJ" 682 int MatDestroy_MPIAIJ(Mat mat) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 int ierr; 686 687 PetscFunctionBegin; 688 #if defined(PETSC_USE_LOG) 689 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 690 #endif 691 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 692 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 693 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 694 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 695 #if defined (PETSC_USE_CTABLE) 696 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 697 #else 698 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 699 #endif 700 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 701 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 702 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 703 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 704 ierr = PetscFree(aij);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 710 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 711 { 712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 713 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 714 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 715 PetscTruth isdraw,isascii,flg; 716 PetscViewer sviewer; 717 PetscViewerFormat format; 718 719 PetscFunctionBegin; 720 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 721 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 722 if (isascii) { 723 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 724 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 725 MatInfo info; 726 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 727 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 728 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 729 if (flg) { 730 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 731 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 732 } else { 733 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 734 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 735 } 736 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 737 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 738 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 739 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 740 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 741 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } else if (format == PETSC_VIEWER_ASCII_INFO) { 744 PetscFunctionReturn(0); 745 } 746 } else if (isdraw) { 747 PetscDraw draw; 748 PetscTruth isnull; 749 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 750 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 751 } 752 753 if (size == 1) { 754 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 755 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 756 } else { 757 /* assemble the entire matrix onto first processor. */ 758 Mat A; 759 Mat_SeqAIJ *Aloc; 760 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 761 Scalar *a; 762 763 if (!rank) { 764 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 765 } else { 766 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 767 } 768 PetscLogObjectParent(mat,A); 769 770 /* copy over the A part */ 771 Aloc = (Mat_SeqAIJ*)aij->A->data; 772 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 773 row = aij->rstart; 774 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 775 for (i=0; i<m; i++) { 776 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 777 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 778 } 779 aj = Aloc->j; 780 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 781 782 /* copy over the B part */ 783 Aloc = (Mat_SeqAIJ*)aij->B->data; 784 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 785 row = aij->rstart; 786 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 787 ct = cols; 788 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 789 for (i=0; i<m; i++) { 790 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 791 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 792 } 793 ierr = PetscFree(ct);CHKERRQ(ierr); 794 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 795 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 796 /* 797 Everyone has to call to draw the matrix since the graphics waits are 798 synchronized across all processors that share the PetscDraw object 799 */ 800 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 801 if (!rank) { 802 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 803 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 804 } 805 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 806 ierr = MatDestroy(A);CHKERRQ(ierr); 807 } 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "MatView_MPIAIJ" 813 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 814 { 815 int ierr; 816 PetscTruth isascii,isdraw,issocket,isbinary; 817 818 PetscFunctionBegin; 819 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 820 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 821 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 822 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 823 if (isascii || isdraw || isbinary || issocket) { 824 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 825 } else { 826 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 827 } 828 PetscFunctionReturn(0); 829 } 830 831 /* 832 This has to provide several versions. 833 834 2) a) use only local smoothing updating outer values only once. 835 b) local smoothing updating outer values each inner iteration 836 3) color updating out values betwen colors. 837 */ 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatRelax_MPIAIJ" 840 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx) 841 { 842 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 843 Mat AA = mat->A,BB = mat->B; 844 Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data; 845 Scalar *b,*x,*xs,*ls,d,*v,sum; 846 int ierr,*idx,*diag; 847 int n = matin->n,m = matin->m,i,shift = A->indexshift; 848 849 PetscFunctionBegin; 850 if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);} 851 diag = A->diag; 852 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 853 if (flag & SOR_ZERO_INITIAL_GUESS) { 854 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 855 PetscFunctionReturn(0); 856 } 857 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 858 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 859 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 860 if (xx != bb) { 861 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 862 } else { 863 b = x; 864 } 865 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 866 xs = x + shift; /* shift by one for index start of 1 */ 867 ls = ls + shift; 868 while (its--) { 869 /* go down through the rows */ 870 for (i=0; i<m; i++) { 871 n = A->i[i+1] - A->i[i]; 872 PetscLogFlops(4*n+3); 873 idx = A->j + A->i[i] + shift; 874 v = A->a + A->i[i] + shift; 875 sum = b[i]; 876 SPARSEDENSEMDOT(sum,xs,v,idx,n); 877 d = fshift + A->a[diag[i]+shift]; 878 n = B->i[i+1] - B->i[i]; 879 idx = B->j + B->i[i] + shift; 880 v = B->a + B->i[i] + shift; 881 SPARSEDENSEMDOT(sum,ls,v,idx,n); 882 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 883 } 884 /* come up through the rows */ 885 for (i=m-1; i>-1; i--) { 886 n = A->i[i+1] - A->i[i]; 887 PetscLogFlops(4*n+3); 888 idx = A->j + A->i[i] + shift; 889 v = A->a + A->i[i] + shift; 890 sum = b[i]; 891 SPARSEDENSEMDOT(sum,xs,v,idx,n); 892 d = fshift + A->a[diag[i]+shift]; 893 n = B->i[i+1] - B->i[i]; 894 idx = B->j + B->i[i] + shift; 895 v = B->a + B->i[i] + shift; 896 SPARSEDENSEMDOT(sum,ls,v,idx,n); 897 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 898 } 899 } 900 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 901 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 902 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 903 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 904 if (flag & SOR_ZERO_INITIAL_GUESS) { 905 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 906 PetscFunctionReturn(0); 907 } 908 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 909 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 910 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 911 if (xx != bb) { 912 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 913 } else { 914 b = x; 915 } 916 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 917 xs = x + shift; /* shift by one for index start of 1 */ 918 ls = ls + shift; 919 while (its--) { 920 for (i=0; i<m; i++) { 921 n = A->i[i+1] - A->i[i]; 922 PetscLogFlops(4*n+3); 923 idx = A->j + A->i[i] + shift; 924 v = A->a + A->i[i] + shift; 925 sum = b[i]; 926 SPARSEDENSEMDOT(sum,xs,v,idx,n); 927 d = fshift + A->a[diag[i]+shift]; 928 n = B->i[i+1] - B->i[i]; 929 idx = B->j + B->i[i] + shift; 930 v = B->a + B->i[i] + shift; 931 SPARSEDENSEMDOT(sum,ls,v,idx,n); 932 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 933 } 934 } 935 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 936 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 937 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 938 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 939 if (flag & SOR_ZERO_INITIAL_GUESS) { 940 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 941 PetscFunctionReturn(0); 942 } 943 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 944 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 945 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 946 if (xx != bb) { 947 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 948 } else { 949 b = x; 950 } 951 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 952 xs = x + shift; /* shift by one for index start of 1 */ 953 ls = ls + shift; 954 while (its--) { 955 for (i=m-1; i>-1; i--) { 956 n = A->i[i+1] - A->i[i]; 957 PetscLogFlops(4*n+3); 958 idx = A->j + A->i[i] + shift; 959 v = A->a + A->i[i] + shift; 960 sum = b[i]; 961 SPARSEDENSEMDOT(sum,xs,v,idx,n); 962 d = fshift + A->a[diag[i]+shift]; 963 n = B->i[i+1] - B->i[i]; 964 idx = B->j + B->i[i] + shift; 965 v = B->a + B->i[i] + shift; 966 SPARSEDENSEMDOT(sum,ls,v,idx,n); 967 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 968 } 969 } 970 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 971 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 972 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 973 } else { 974 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 975 } 976 PetscFunctionReturn(0); 977 } 978 979 #undef __FUNCT__ 980 #define __FUNCT__ "MatGetInfo_MPIAIJ" 981 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 982 { 983 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 984 Mat A = mat->A,B = mat->B; 985 int ierr; 986 PetscReal isend[5],irecv[5]; 987 988 PetscFunctionBegin; 989 info->block_size = 1.0; 990 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 991 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 992 isend[3] = info->memory; isend[4] = info->mallocs; 993 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 994 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 995 isend[3] += info->memory; isend[4] += info->mallocs; 996 if (flag == MAT_LOCAL) { 997 info->nz_used = isend[0]; 998 info->nz_allocated = isend[1]; 999 info->nz_unneeded = isend[2]; 1000 info->memory = isend[3]; 1001 info->mallocs = isend[4]; 1002 } else if (flag == MAT_GLOBAL_MAX) { 1003 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1004 info->nz_used = irecv[0]; 1005 info->nz_allocated = irecv[1]; 1006 info->nz_unneeded = irecv[2]; 1007 info->memory = irecv[3]; 1008 info->mallocs = irecv[4]; 1009 } else if (flag == MAT_GLOBAL_SUM) { 1010 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1011 info->nz_used = irecv[0]; 1012 info->nz_allocated = irecv[1]; 1013 info->nz_unneeded = irecv[2]; 1014 info->memory = irecv[3]; 1015 info->mallocs = irecv[4]; 1016 } 1017 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1018 info->fill_ratio_needed = 0; 1019 info->factor_mallocs = 0; 1020 info->rows_global = (double)matin->M; 1021 info->columns_global = (double)matin->N; 1022 info->rows_local = (double)matin->m; 1023 info->columns_local = (double)matin->N; 1024 1025 PetscFunctionReturn(0); 1026 } 1027 1028 #undef __FUNCT__ 1029 #define __FUNCT__ "MatSetOption_MPIAIJ" 1030 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1031 { 1032 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1033 int ierr; 1034 1035 PetscFunctionBegin; 1036 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1037 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1038 op == MAT_COLUMNS_UNSORTED || 1039 op == MAT_COLUMNS_SORTED || 1040 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1041 op == MAT_KEEP_ZEROED_ROWS || 1042 op == MAT_NEW_NONZERO_LOCATION_ERR || 1043 op == MAT_USE_INODES || 1044 op == MAT_DO_NOT_USE_INODES || 1045 op == MAT_IGNORE_ZERO_ENTRIES) { 1046 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1047 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1048 } else if (op == MAT_ROW_ORIENTED) { 1049 a->roworiented = PETSC_TRUE; 1050 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1051 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1052 } else if (op == MAT_ROWS_SORTED || 1053 op == MAT_ROWS_UNSORTED || 1054 op == MAT_YES_NEW_DIAGONALS) { 1055 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1056 } else if (op == MAT_COLUMN_ORIENTED) { 1057 a->roworiented = PETSC_FALSE; 1058 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1059 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1060 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1061 a->donotstash = PETSC_TRUE; 1062 } else if (op == MAT_NO_NEW_DIAGONALS){ 1063 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1064 } else { 1065 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1066 } 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatGetOwnershipRange_MPIAIJ" 1072 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1073 { 1074 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1075 1076 PetscFunctionBegin; 1077 if (m) *m = mat->rstart; 1078 if (n) *n = mat->rend; 1079 PetscFunctionReturn(0); 1080 } 1081 1082 #undef __FUNCT__ 1083 #define __FUNCT__ "MatGetRow_MPIAIJ" 1084 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1085 { 1086 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1087 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1088 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1089 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1090 int *cmap,*idx_p; 1091 1092 PetscFunctionBegin; 1093 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1094 mat->getrowactive = PETSC_TRUE; 1095 1096 if (!mat->rowvalues && (idx || v)) { 1097 /* 1098 allocate enough space to hold information from the longest row. 1099 */ 1100 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1101 int max = 1,tmp; 1102 for (i=0; i<matin->m; i++) { 1103 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1104 if (max < tmp) { max = tmp; } 1105 } 1106 ierr = PetscMalloc(max*(sizeof(int)+sizeof(Scalar)),&mat->rowvalues);CHKERRQ(ierr); 1107 mat->rowindices = (int*)(mat->rowvalues + max); 1108 } 1109 1110 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1111 lrow = row - rstart; 1112 1113 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1114 if (!v) {pvA = 0; pvB = 0;} 1115 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1116 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1117 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1118 nztot = nzA + nzB; 1119 1120 cmap = mat->garray; 1121 if (v || idx) { 1122 if (nztot) { 1123 /* Sort by increasing column numbers, assuming A and B already sorted */ 1124 int imark = -1; 1125 if (v) { 1126 *v = v_p = mat->rowvalues; 1127 for (i=0; i<nzB; i++) { 1128 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1129 else break; 1130 } 1131 imark = i; 1132 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1133 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1134 } 1135 if (idx) { 1136 *idx = idx_p = mat->rowindices; 1137 if (imark > -1) { 1138 for (i=0; i<imark; i++) { 1139 idx_p[i] = cmap[cworkB[i]]; 1140 } 1141 } else { 1142 for (i=0; i<nzB; i++) { 1143 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1144 else break; 1145 } 1146 imark = i; 1147 } 1148 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1149 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1150 } 1151 } else { 1152 if (idx) *idx = 0; 1153 if (v) *v = 0; 1154 } 1155 } 1156 *nz = nztot; 1157 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1158 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1164 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1165 { 1166 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1167 1168 PetscFunctionBegin; 1169 if (aij->getrowactive == PETSC_FALSE) { 1170 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1171 } 1172 aij->getrowactive = PETSC_FALSE; 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatNorm_MPIAIJ" 1178 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1179 { 1180 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1181 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1182 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1183 PetscReal sum = 0.0; 1184 Scalar *v; 1185 1186 PetscFunctionBegin; 1187 if (aij->size == 1) { 1188 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1189 } else { 1190 if (type == NORM_FROBENIUS) { 1191 v = amat->a; 1192 for (i=0; i<amat->nz; i++) { 1193 #if defined(PETSC_USE_COMPLEX) 1194 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1195 #else 1196 sum += (*v)*(*v); v++; 1197 #endif 1198 } 1199 v = bmat->a; 1200 for (i=0; i<bmat->nz; i++) { 1201 #if defined(PETSC_USE_COMPLEX) 1202 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1203 #else 1204 sum += (*v)*(*v); v++; 1205 #endif 1206 } 1207 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1208 *norm = sqrt(*norm); 1209 } else if (type == NORM_1) { /* max column norm */ 1210 PetscReal *tmp,*tmp2; 1211 int *jj,*garray = aij->garray; 1212 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1213 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1214 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1215 *norm = 0.0; 1216 v = amat->a; jj = amat->j; 1217 for (j=0; j<amat->nz; j++) { 1218 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1219 } 1220 v = bmat->a; jj = bmat->j; 1221 for (j=0; j<bmat->nz; j++) { 1222 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1223 } 1224 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1225 for (j=0; j<mat->N; j++) { 1226 if (tmp2[j] > *norm) *norm = tmp2[j]; 1227 } 1228 ierr = PetscFree(tmp);CHKERRQ(ierr); 1229 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1230 } else if (type == NORM_INFINITY) { /* max row norm */ 1231 PetscReal ntemp = 0.0; 1232 for (j=0; j<aij->A->m; j++) { 1233 v = amat->a + amat->i[j] + shift; 1234 sum = 0.0; 1235 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1236 sum += PetscAbsScalar(*v); v++; 1237 } 1238 v = bmat->a + bmat->i[j] + shift; 1239 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1240 sum += PetscAbsScalar(*v); v++; 1241 } 1242 if (sum > ntemp) ntemp = sum; 1243 } 1244 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1245 } else { 1246 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1247 } 1248 } 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatTranspose_MPIAIJ" 1254 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1255 { 1256 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1257 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1258 int ierr,shift = Aloc->indexshift; 1259 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1260 Mat B; 1261 Scalar *array; 1262 1263 PetscFunctionBegin; 1264 if (!matout && M != N) { 1265 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1266 } 1267 1268 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1269 1270 /* copy over the A part */ 1271 Aloc = (Mat_SeqAIJ*)a->A->data; 1272 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1273 row = a->rstart; 1274 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1275 for (i=0; i<m; i++) { 1276 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1277 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1278 } 1279 aj = Aloc->j; 1280 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1281 1282 /* copy over the B part */ 1283 Aloc = (Mat_SeqAIJ*)a->B->data; 1284 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1285 row = a->rstart; 1286 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1287 ct = cols; 1288 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1289 for (i=0; i<m; i++) { 1290 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1291 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1292 } 1293 ierr = PetscFree(ct);CHKERRQ(ierr); 1294 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1295 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1296 if (matout) { 1297 *matout = B; 1298 } else { 1299 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1300 } 1301 PetscFunctionReturn(0); 1302 } 1303 1304 #undef __FUNCT__ 1305 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1306 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1307 { 1308 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1309 Mat a = aij->A,b = aij->B; 1310 int ierr,s1,s2,s3; 1311 1312 PetscFunctionBegin; 1313 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1314 if (rr) { 1315 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1316 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1317 /* Overlap communication with computation. */ 1318 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1319 } 1320 if (ll) { 1321 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1322 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1323 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1324 } 1325 /* scale the diagonal block */ 1326 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1327 1328 if (rr) { 1329 /* Do a scatter end and then right scale the off-diagonal block */ 1330 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1331 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1332 } 1333 1334 PetscFunctionReturn(0); 1335 } 1336 1337 1338 #undef __FUNCT__ 1339 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1340 int MatPrintHelp_MPIAIJ(Mat A) 1341 { 1342 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1343 int ierr; 1344 1345 PetscFunctionBegin; 1346 if (!a->rank) { 1347 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1348 } 1349 PetscFunctionReturn(0); 1350 } 1351 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1354 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1355 { 1356 PetscFunctionBegin; 1357 *bs = 1; 1358 PetscFunctionReturn(0); 1359 } 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1362 int MatSetUnfactored_MPIAIJ(Mat A) 1363 { 1364 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1365 int ierr; 1366 1367 PetscFunctionBegin; 1368 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1369 PetscFunctionReturn(0); 1370 } 1371 1372 #undef __FUNCT__ 1373 #define __FUNCT__ "MatEqual_MPIAIJ" 1374 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1375 { 1376 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1377 Mat a,b,c,d; 1378 PetscTruth flg; 1379 int ierr; 1380 1381 PetscFunctionBegin; 1382 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1383 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1384 a = matA->A; b = matA->B; 1385 c = matB->A; d = matB->B; 1386 1387 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1388 if (flg == PETSC_TRUE) { 1389 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1390 } 1391 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1392 PetscFunctionReturn(0); 1393 } 1394 1395 #undef __FUNCT__ 1396 #define __FUNCT__ "MatCopy_MPIAIJ" 1397 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1398 { 1399 int ierr; 1400 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1401 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1402 PetscTruth flg; 1403 1404 PetscFunctionBegin; 1405 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1406 if (str != SAME_NONZERO_PATTERN || !flg) { 1407 /* because of the column compression in the off-processor part of the matrix a->B, 1408 the number of columns in a->B and b->B may be different, hence we cannot call 1409 the MatCopy() directly on the two parts. If need be, we can provide a more 1410 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1411 then copying the submatrices */ 1412 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1413 } else { 1414 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1415 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1416 } 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1422 int MatSetUpPreallocation_MPIAIJ(Mat A) 1423 { 1424 int ierr; 1425 1426 PetscFunctionBegin; 1427 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1428 PetscFunctionReturn(0); 1429 } 1430 1431 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1432 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1433 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1434 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1435 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1436 #if !defined(PETSC_USE_COMPLEX) 1437 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1438 #endif 1439 1440 /* -------------------------------------------------------------------*/ 1441 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1442 MatGetRow_MPIAIJ, 1443 MatRestoreRow_MPIAIJ, 1444 MatMult_MPIAIJ, 1445 MatMultAdd_MPIAIJ, 1446 MatMultTranspose_MPIAIJ, 1447 MatMultTransposeAdd_MPIAIJ, 1448 0, 1449 0, 1450 0, 1451 0, 1452 0, 1453 0, 1454 MatRelax_MPIAIJ, 1455 MatTranspose_MPIAIJ, 1456 MatGetInfo_MPIAIJ, 1457 MatEqual_MPIAIJ, 1458 MatGetDiagonal_MPIAIJ, 1459 MatDiagonalScale_MPIAIJ, 1460 MatNorm_MPIAIJ, 1461 MatAssemblyBegin_MPIAIJ, 1462 MatAssemblyEnd_MPIAIJ, 1463 0, 1464 MatSetOption_MPIAIJ, 1465 MatZeroEntries_MPIAIJ, 1466 MatZeroRows_MPIAIJ, 1467 #if !defined(PETSC_USE_COMPLEX) 1468 MatLUFactorSymbolic_MPIAIJ_TFS, 1469 #else 1470 0, 1471 #endif 1472 0, 1473 0, 1474 0, 1475 MatSetUpPreallocation_MPIAIJ, 1476 0, 1477 MatGetOwnershipRange_MPIAIJ, 1478 0, 1479 0, 1480 0, 1481 0, 1482 MatDuplicate_MPIAIJ, 1483 0, 1484 0, 1485 0, 1486 0, 1487 0, 1488 MatGetSubMatrices_MPIAIJ, 1489 MatIncreaseOverlap_MPIAIJ, 1490 MatGetValues_MPIAIJ, 1491 MatCopy_MPIAIJ, 1492 MatPrintHelp_MPIAIJ, 1493 MatScale_MPIAIJ, 1494 0, 1495 0, 1496 0, 1497 MatGetBlockSize_MPIAIJ, 1498 0, 1499 0, 1500 0, 1501 0, 1502 MatFDColoringCreate_MPIAIJ, 1503 0, 1504 MatSetUnfactored_MPIAIJ, 1505 0, 1506 0, 1507 MatGetSubMatrix_MPIAIJ, 1508 MatDestroy_MPIAIJ, 1509 MatView_MPIAIJ, 1510 MatGetMaps_Petsc, 1511 0, 1512 0, 1513 0, 1514 0, 1515 0, 1516 0, 1517 0, 1518 0, 1519 MatSetColoring_MPIAIJ, 1520 MatSetValuesAdic_MPIAIJ, 1521 MatSetValuesAdifor_MPIAIJ 1522 }; 1523 1524 /* ----------------------------------------------------------------------------------------*/ 1525 1526 EXTERN_C_BEGIN 1527 #undef __FUNCT__ 1528 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1529 int MatStoreValues_MPIAIJ(Mat mat) 1530 { 1531 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1532 int ierr; 1533 1534 PetscFunctionBegin; 1535 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1536 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 EXTERN_C_END 1540 1541 EXTERN_C_BEGIN 1542 #undef __FUNCT__ 1543 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1544 int MatRetrieveValues_MPIAIJ(Mat mat) 1545 { 1546 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1547 int ierr; 1548 1549 PetscFunctionBegin; 1550 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1551 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1552 PetscFunctionReturn(0); 1553 } 1554 EXTERN_C_END 1555 1556 #include "petscpc.h" 1557 EXTERN_C_BEGIN 1558 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1559 EXTERN_C_END 1560 1561 EXTERN_C_BEGIN 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatCreate_MPIAIJ" 1564 int MatCreate_MPIAIJ(Mat B) 1565 { 1566 Mat_MPIAIJ *b; 1567 int ierr,i,size; 1568 1569 PetscFunctionBegin; 1570 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1571 1572 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1573 B->data = (void*)b; 1574 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1575 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1576 B->factor = 0; 1577 B->assembled = PETSC_FALSE; 1578 B->mapping = 0; 1579 1580 B->insertmode = NOT_SET_VALUES; 1581 b->size = size; 1582 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1583 1584 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1585 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1586 1587 /* the information in the maps duplicates the information computed below, eventually 1588 we should remove the duplicate information that is not contained in the maps */ 1589 ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1590 ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1591 1592 /* build local table of row and column ownerships */ 1593 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1594 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1595 b->cowners = b->rowners + b->size + 2; 1596 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1597 b->rowners[0] = 0; 1598 for (i=2; i<=b->size; i++) { 1599 b->rowners[i] += b->rowners[i-1]; 1600 } 1601 b->rstart = b->rowners[b->rank]; 1602 b->rend = b->rowners[b->rank+1]; 1603 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1604 b->cowners[0] = 0; 1605 for (i=2; i<=b->size; i++) { 1606 b->cowners[i] += b->cowners[i-1]; 1607 } 1608 b->cstart = b->cowners[b->rank]; 1609 b->cend = b->cowners[b->rank+1]; 1610 1611 /* build cache for off array entries formed */ 1612 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1613 b->donotstash = PETSC_FALSE; 1614 b->colmap = 0; 1615 b->garray = 0; 1616 b->roworiented = PETSC_TRUE; 1617 1618 /* stuff used for matrix vector multiply */ 1619 b->lvec = PETSC_NULL; 1620 b->Mvctx = PETSC_NULL; 1621 1622 /* stuff for MatGetRow() */ 1623 b->rowindices = 0; 1624 b->rowvalues = 0; 1625 b->getrowactive = PETSC_FALSE; 1626 1627 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1628 "MatStoreValues_MPIAIJ", 1629 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1630 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1631 "MatRetrieveValues_MPIAIJ", 1632 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1633 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1634 "MatGetDiagonalBlock_MPIAIJ", 1635 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1636 PetscFunctionReturn(0); 1637 } 1638 EXTERN_C_END 1639 1640 #undef __FUNCT__ 1641 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1642 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1643 { 1644 Mat mat; 1645 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1646 int ierr; 1647 1648 PetscFunctionBegin; 1649 *newmat = 0; 1650 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1651 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1652 a = (Mat_MPIAIJ*)mat->data; 1653 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1654 mat->factor = matin->factor; 1655 mat->assembled = PETSC_TRUE; 1656 mat->insertmode = NOT_SET_VALUES; 1657 mat->preallocated = PETSC_TRUE; 1658 1659 a->rstart = oldmat->rstart; 1660 a->rend = oldmat->rend; 1661 a->cstart = oldmat->cstart; 1662 a->cend = oldmat->cend; 1663 a->size = oldmat->size; 1664 a->rank = oldmat->rank; 1665 a->donotstash = oldmat->donotstash; 1666 a->roworiented = oldmat->roworiented; 1667 a->rowindices = 0; 1668 a->rowvalues = 0; 1669 a->getrowactive = PETSC_FALSE; 1670 1671 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1672 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1673 if (oldmat->colmap) { 1674 #if defined (PETSC_USE_CTABLE) 1675 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1676 #else 1677 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1678 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1679 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1680 #endif 1681 } else a->colmap = 0; 1682 if (oldmat->garray) { 1683 int len; 1684 len = oldmat->B->n; 1685 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1686 PetscLogObjectMemory(mat,len*sizeof(int)); 1687 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1688 } else a->garray = 0; 1689 1690 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1691 PetscLogObjectParent(mat,a->lvec); 1692 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1693 PetscLogObjectParent(mat,a->Mvctx); 1694 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1695 PetscLogObjectParent(mat,a->A); 1696 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1697 PetscLogObjectParent(mat,a->B); 1698 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1699 *newmat = mat; 1700 PetscFunctionReturn(0); 1701 } 1702 1703 #include "petscsys.h" 1704 1705 EXTERN_C_BEGIN 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatLoad_MPIAIJ" 1708 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1709 { 1710 Mat A; 1711 Scalar *vals,*svals; 1712 MPI_Comm comm = ((PetscObject)viewer)->comm; 1713 MPI_Status status; 1714 int i,nz,ierr,j,rstart,rend,fd; 1715 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1716 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1717 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1718 1719 PetscFunctionBegin; 1720 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1721 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1722 if (!rank) { 1723 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1724 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1725 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1726 if (header[3] < 0) { 1727 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1728 } 1729 } 1730 1731 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1732 M = header[1]; N = header[2]; 1733 /* determine ownership of all rows */ 1734 m = M/size + ((M % size) > rank); 1735 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1736 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1737 rowners[0] = 0; 1738 for (i=2; i<=size; i++) { 1739 rowners[i] += rowners[i-1]; 1740 } 1741 rstart = rowners[rank]; 1742 rend = rowners[rank+1]; 1743 1744 /* distribute row lengths to all processors */ 1745 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1746 offlens = ourlens + (rend-rstart); 1747 if (!rank) { 1748 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1749 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1750 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1751 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1752 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1753 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1754 } else { 1755 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1756 } 1757 1758 if (!rank) { 1759 /* calculate the number of nonzeros on each processor */ 1760 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1761 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1762 for (i=0; i<size; i++) { 1763 for (j=rowners[i]; j< rowners[i+1]; j++) { 1764 procsnz[i] += rowlengths[j]; 1765 } 1766 } 1767 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1768 1769 /* determine max buffer needed and allocate it */ 1770 maxnz = 0; 1771 for (i=0; i<size; i++) { 1772 maxnz = PetscMax(maxnz,procsnz[i]); 1773 } 1774 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1775 1776 /* read in my part of the matrix column indices */ 1777 nz = procsnz[0]; 1778 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1779 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1780 1781 /* read in every one elses and ship off */ 1782 for (i=1; i<size; i++) { 1783 nz = procsnz[i]; 1784 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1785 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1786 } 1787 ierr = PetscFree(cols);CHKERRQ(ierr); 1788 } else { 1789 /* determine buffer space needed for message */ 1790 nz = 0; 1791 for (i=0; i<m; i++) { 1792 nz += ourlens[i]; 1793 } 1794 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1795 1796 /* receive message of column indices*/ 1797 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1798 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1799 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1800 } 1801 1802 /* determine column ownership if matrix is not square */ 1803 if (N != M) { 1804 n = N/size + ((N % size) > rank); 1805 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1806 cstart = cend - n; 1807 } else { 1808 cstart = rstart; 1809 cend = rend; 1810 n = cend - cstart; 1811 } 1812 1813 /* loop over local rows, determining number of off diagonal entries */ 1814 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1815 jj = 0; 1816 for (i=0; i<m; i++) { 1817 for (j=0; j<ourlens[i]; j++) { 1818 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1819 jj++; 1820 } 1821 } 1822 1823 /* create our matrix */ 1824 for (i=0; i<m; i++) { 1825 ourlens[i] -= offlens[i]; 1826 } 1827 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1828 A = *newmat; 1829 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1830 for (i=0; i<m; i++) { 1831 ourlens[i] += offlens[i]; 1832 } 1833 1834 if (!rank) { 1835 ierr = PetscMalloc(maxnz*sizeof(Scalar),&vals);CHKERRQ(ierr); 1836 1837 /* read in my part of the matrix numerical values */ 1838 nz = procsnz[0]; 1839 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1840 1841 /* insert into matrix */ 1842 jj = rstart; 1843 smycols = mycols; 1844 svals = vals; 1845 for (i=0; i<m; i++) { 1846 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1847 smycols += ourlens[i]; 1848 svals += ourlens[i]; 1849 jj++; 1850 } 1851 1852 /* read in other processors and ship out */ 1853 for (i=1; i<size; i++) { 1854 nz = procsnz[i]; 1855 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1856 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1857 } 1858 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1859 } else { 1860 /* receive numeric values */ 1861 ierr = PetscMalloc((nz+1)*sizeof(Scalar),&vals);CHKERRQ(ierr); 1862 1863 /* receive message of values*/ 1864 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1865 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1866 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1867 1868 /* insert into matrix */ 1869 jj = rstart; 1870 smycols = mycols; 1871 svals = vals; 1872 for (i=0; i<m; i++) { 1873 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1874 smycols += ourlens[i]; 1875 svals += ourlens[i]; 1876 jj++; 1877 } 1878 } 1879 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1880 ierr = PetscFree(vals);CHKERRQ(ierr); 1881 ierr = PetscFree(mycols);CHKERRQ(ierr); 1882 ierr = PetscFree(rowners);CHKERRQ(ierr); 1883 1884 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1885 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1886 PetscFunctionReturn(0); 1887 } 1888 EXTERN_C_END 1889 1890 #undef __FUNCT__ 1891 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1892 /* 1893 Not great since it makes two copies of the submatrix, first an SeqAIJ 1894 in local and then by concatenating the local matrices the end result. 1895 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1896 */ 1897 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1898 { 1899 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1900 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1901 Mat *local,M,Mreuse; 1902 Scalar *vwork,*aa; 1903 MPI_Comm comm = mat->comm; 1904 Mat_SeqAIJ *aij; 1905 1906 1907 PetscFunctionBegin; 1908 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1909 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1910 1911 if (call == MAT_REUSE_MATRIX) { 1912 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1913 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1914 local = &Mreuse; 1915 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1916 } else { 1917 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1918 Mreuse = *local; 1919 ierr = PetscFree(local);CHKERRQ(ierr); 1920 } 1921 1922 /* 1923 m - number of local rows 1924 n - number of columns (same on all processors) 1925 rstart - first row in new global matrix generated 1926 */ 1927 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1928 if (call == MAT_INITIAL_MATRIX) { 1929 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1930 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1931 ii = aij->i; 1932 jj = aij->j; 1933 1934 /* 1935 Determine the number of non-zeros in the diagonal and off-diagonal 1936 portions of the matrix in order to do correct preallocation 1937 */ 1938 1939 /* first get start and end of "diagonal" columns */ 1940 if (csize == PETSC_DECIDE) { 1941 nlocal = n/size + ((n % size) > rank); 1942 } else { 1943 nlocal = csize; 1944 } 1945 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1946 rstart = rend - nlocal; 1947 if (rank == size - 1 && rend != n) { 1948 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1949 } 1950 1951 /* next, compute all the lengths */ 1952 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1953 olens = dlens + m; 1954 for (i=0; i<m; i++) { 1955 jend = ii[i+1] - ii[i]; 1956 olen = 0; 1957 dlen = 0; 1958 for (j=0; j<jend; j++) { 1959 if (*jj < rstart || *jj >= rend) olen++; 1960 else dlen++; 1961 jj++; 1962 } 1963 olens[i] = olen; 1964 dlens[i] = dlen; 1965 } 1966 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1967 ierr = PetscFree(dlens);CHKERRQ(ierr); 1968 } else { 1969 int ml,nl; 1970 1971 M = *newmat; 1972 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1973 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1974 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1975 /* 1976 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1977 rather than the slower MatSetValues(). 1978 */ 1979 M->was_assembled = PETSC_TRUE; 1980 M->assembled = PETSC_FALSE; 1981 } 1982 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1983 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1984 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1985 ii = aij->i; 1986 jj = aij->j; 1987 aa = aij->a; 1988 for (i=0; i<m; i++) { 1989 row = rstart + i; 1990 nz = ii[i+1] - ii[i]; 1991 cwork = jj; jj += nz; 1992 vwork = aa; aa += nz; 1993 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1994 } 1995 1996 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1997 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1998 *newmat = M; 1999 2000 /* save submatrix used in processor for next request */ 2001 if (call == MAT_INITIAL_MATRIX) { 2002 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2003 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2004 } 2005 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2011 /*@C 2012 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2013 (the default parallel PETSc format). For good matrix assembly performance 2014 the user should preallocate the matrix storage by setting the parameters 2015 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2016 performance can be increased by more than a factor of 50. 2017 2018 Collective on MPI_Comm 2019 2020 Input Parameters: 2021 + A - the matrix 2022 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2023 (same value is used for all local rows) 2024 . d_nnz - array containing the number of nonzeros in the various rows of the 2025 DIAGONAL portion of the local submatrix (possibly different for each row) 2026 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2027 The size of this array is equal to the number of local rows, i.e 'm'. 2028 You must leave room for the diagonal entry even if it is zero. 2029 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2030 submatrix (same value is used for all local rows). 2031 - o_nnz - array containing the number of nonzeros in the various rows of the 2032 OFF-DIAGONAL portion of the local submatrix (possibly different for 2033 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2034 structure. The size of this array is equal to the number 2035 of local rows, i.e 'm'. 2036 2037 The AIJ format (also called the Yale sparse matrix format or 2038 compressed row storage), is fully compatible with standard Fortran 77 2039 storage. That is, the stored row and column indices can begin at 2040 either one (as in Fortran) or zero. See the users manual for details. 2041 2042 The user MUST specify either the local or global matrix dimensions 2043 (possibly both). 2044 2045 The parallel matrix is partitioned such that the first m0 rows belong to 2046 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2047 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2048 2049 The DIAGONAL portion of the local submatrix of a processor can be defined 2050 as the submatrix which is obtained by extraction the part corresponding 2051 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2052 first row that belongs to the processor, and r2 is the last row belonging 2053 to the this processor. This is a square mxm matrix. The remaining portion 2054 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2055 2056 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2057 2058 By default, this format uses inodes (identical nodes) when possible. 2059 We search for consecutive rows with the same nonzero structure, thereby 2060 reusing matrix information to achieve increased efficiency. 2061 2062 Options Database Keys: 2063 + -mat_aij_no_inode - Do not use inodes 2064 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2065 - -mat_aij_oneindex - Internally use indexing starting at 1 2066 rather than 0. Note that when calling MatSetValues(), 2067 the user still MUST index entries starting at 0! 2068 2069 Example usage: 2070 2071 Consider the following 8x8 matrix with 34 non-zero values, that is 2072 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2073 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2074 as follows: 2075 2076 .vb 2077 1 2 0 | 0 3 0 | 0 4 2078 Proc0 0 5 6 | 7 0 0 | 8 0 2079 9 0 10 | 11 0 0 | 12 0 2080 ------------------------------------- 2081 13 0 14 | 15 16 17 | 0 0 2082 Proc1 0 18 0 | 19 20 21 | 0 0 2083 0 0 0 | 22 23 0 | 24 0 2084 ------------------------------------- 2085 Proc2 25 26 27 | 0 0 28 | 29 0 2086 30 0 0 | 31 32 33 | 0 34 2087 .ve 2088 2089 This can be represented as a collection of submatrices as: 2090 2091 .vb 2092 A B C 2093 D E F 2094 G H I 2095 .ve 2096 2097 Where the submatrices A,B,C are owned by proc0, D,E,F are 2098 owned by proc1, G,H,I are owned by proc2. 2099 2100 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2101 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2102 The 'M','N' parameters are 8,8, and have the same values on all procs. 2103 2104 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2105 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2106 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2107 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2108 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2109 matrix, ans [DF] as another SeqAIJ matrix. 2110 2111 When d_nz, o_nz parameters are specified, d_nz storage elements are 2112 allocated for every row of the local diagonal submatrix, and o_nz 2113 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2114 One way to choose d_nz and o_nz is to use the max nonzerors per local 2115 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2116 In this case, the values of d_nz,o_nz are: 2117 .vb 2118 proc0 : dnz = 2, o_nz = 2 2119 proc1 : dnz = 3, o_nz = 2 2120 proc2 : dnz = 1, o_nz = 4 2121 .ve 2122 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2123 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2124 for proc3. i.e we are using 12+15+10=37 storage locations to store 2125 34 values. 2126 2127 When d_nnz, o_nnz parameters are specified, the storage is specified 2128 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2129 In the above case the values for d_nnz,o_nnz are: 2130 .vb 2131 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2132 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2133 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2134 .ve 2135 Here the space allocated is sum of all the above values i.e 34, and 2136 hence pre-allocation is perfect. 2137 2138 Level: intermediate 2139 2140 .keywords: matrix, aij, compressed row, sparse, parallel 2141 2142 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2143 @*/ 2144 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2145 { 2146 Mat_MPIAIJ *b; 2147 int ierr,i; 2148 PetscTruth flg2; 2149 2150 PetscFunctionBegin; 2151 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2152 if (!flg2) PetscFunctionReturn(0); 2153 B->preallocated = PETSC_TRUE; 2154 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2155 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2156 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2157 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2158 if (d_nnz) { 2159 for (i=0; i<B->m; i++) { 2160 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2161 } 2162 } 2163 if (o_nnz) { 2164 for (i=0; i<B->m; i++) { 2165 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2166 } 2167 } 2168 b = (Mat_MPIAIJ*)B->data; 2169 2170 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2171 PetscLogObjectParent(B,b->A); 2172 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2173 PetscLogObjectParent(B,b->B); 2174 2175 PetscFunctionReturn(0); 2176 } 2177 2178 #undef __FUNCT__ 2179 #define __FUNCT__ "MatCreateMPIAIJ" 2180 /*@C 2181 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2182 (the default parallel PETSc format). For good matrix assembly performance 2183 the user should preallocate the matrix storage by setting the parameters 2184 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2185 performance can be increased by more than a factor of 50. 2186 2187 Collective on MPI_Comm 2188 2189 Input Parameters: 2190 + comm - MPI communicator 2191 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2192 This value should be the same as the local size used in creating the 2193 y vector for the matrix-vector product y = Ax. 2194 . n - This value should be the same as the local size used in creating the 2195 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2196 calculated if N is given) For square matrices n is almost always m. 2197 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2198 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2199 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2200 (same value is used for all local rows) 2201 . d_nnz - array containing the number of nonzeros in the various rows of the 2202 DIAGONAL portion of the local submatrix (possibly different for each row) 2203 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2204 The size of this array is equal to the number of local rows, i.e 'm'. 2205 You must leave room for the diagonal entry even if it is zero. 2206 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2207 submatrix (same value is used for all local rows). 2208 - o_nnz - array containing the number of nonzeros in the various rows of the 2209 OFF-DIAGONAL portion of the local submatrix (possibly different for 2210 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2211 structure. The size of this array is equal to the number 2212 of local rows, i.e 'm'. 2213 2214 Output Parameter: 2215 . A - the matrix 2216 2217 Notes: 2218 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2219 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2220 storage requirements for this matrix. 2221 2222 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2223 processor than it must be used on all processors that share the object for 2224 that argument. 2225 2226 The AIJ format (also called the Yale sparse matrix format or 2227 compressed row storage), is fully compatible with standard Fortran 77 2228 storage. That is, the stored row and column indices can begin at 2229 either one (as in Fortran) or zero. See the users manual for details. 2230 2231 The user MUST specify either the local or global matrix dimensions 2232 (possibly both). 2233 2234 The parallel matrix is partitioned such that the first m0 rows belong to 2235 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2236 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2237 2238 The DIAGONAL portion of the local submatrix of a processor can be defined 2239 as the submatrix which is obtained by extraction the part corresponding 2240 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2241 first row that belongs to the processor, and r2 is the last row belonging 2242 to the this processor. This is a square mxm matrix. The remaining portion 2243 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2244 2245 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2246 2247 By default, this format uses inodes (identical nodes) when possible. 2248 We search for consecutive rows with the same nonzero structure, thereby 2249 reusing matrix information to achieve increased efficiency. 2250 2251 Options Database Keys: 2252 + -mat_aij_no_inode - Do not use inodes 2253 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2254 - -mat_aij_oneindex - Internally use indexing starting at 1 2255 rather than 0. Note that when calling MatSetValues(), 2256 the user still MUST index entries starting at 0! 2257 2258 2259 Example usage: 2260 2261 Consider the following 8x8 matrix with 34 non-zero values, that is 2262 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2263 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2264 as follows: 2265 2266 .vb 2267 1 2 0 | 0 3 0 | 0 4 2268 Proc0 0 5 6 | 7 0 0 | 8 0 2269 9 0 10 | 11 0 0 | 12 0 2270 ------------------------------------- 2271 13 0 14 | 15 16 17 | 0 0 2272 Proc1 0 18 0 | 19 20 21 | 0 0 2273 0 0 0 | 22 23 0 | 24 0 2274 ------------------------------------- 2275 Proc2 25 26 27 | 0 0 28 | 29 0 2276 30 0 0 | 31 32 33 | 0 34 2277 .ve 2278 2279 This can be represented as a collection of submatrices as: 2280 2281 .vb 2282 A B C 2283 D E F 2284 G H I 2285 .ve 2286 2287 Where the submatrices A,B,C are owned by proc0, D,E,F are 2288 owned by proc1, G,H,I are owned by proc2. 2289 2290 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2291 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2292 The 'M','N' parameters are 8,8, and have the same values on all procs. 2293 2294 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2295 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2296 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2297 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2298 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2299 matrix, ans [DF] as another SeqAIJ matrix. 2300 2301 When d_nz, o_nz parameters are specified, d_nz storage elements are 2302 allocated for every row of the local diagonal submatrix, and o_nz 2303 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2304 One way to choose d_nz and o_nz is to use the max nonzerors per local 2305 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2306 In this case, the values of d_nz,o_nz are: 2307 .vb 2308 proc0 : dnz = 2, o_nz = 2 2309 proc1 : dnz = 3, o_nz = 2 2310 proc2 : dnz = 1, o_nz = 4 2311 .ve 2312 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2313 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2314 for proc3. i.e we are using 12+15+10=37 storage locations to store 2315 34 values. 2316 2317 When d_nnz, o_nnz parameters are specified, the storage is specified 2318 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2319 In the above case the values for d_nnz,o_nnz are: 2320 .vb 2321 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2322 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2323 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2324 .ve 2325 Here the space allocated is sum of all the above values i.e 34, and 2326 hence pre-allocation is perfect. 2327 2328 Level: intermediate 2329 2330 .keywords: matrix, aij, compressed row, sparse, parallel 2331 2332 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2333 @*/ 2334 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2335 { 2336 int ierr,size; 2337 2338 PetscFunctionBegin; 2339 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2340 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2341 if (size > 1) { 2342 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2343 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2344 } else { 2345 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2346 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2347 } 2348 PetscFunctionReturn(0); 2349 } 2350 2351 #undef __FUNCT__ 2352 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2353 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2354 { 2355 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2356 PetscFunctionBegin; 2357 *Ad = a->A; 2358 *Ao = a->B; 2359 *colmap = a->garray; 2360 PetscFunctionReturn(0); 2361 } 2362 2363 #undef __FUNCT__ 2364 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2365 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2366 { 2367 int ierr; 2368 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2369 2370 PetscFunctionBegin; 2371 if (coloring->ctype == IS_COLORING_LOCAL) { 2372 int *allcolors,*colors,i; 2373 ISColoring ocoloring; 2374 2375 /* set coloring for diagonal portion */ 2376 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2377 2378 /* set coloring for off-diagonal portion */ 2379 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2380 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2381 for (i=0; i<a->B->n; i++) { 2382 colors[i] = allcolors[a->garray[i]]; 2383 } 2384 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2385 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2386 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2387 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2388 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2389 int *colors,i,*larray; 2390 ISColoring ocoloring; 2391 2392 /* set coloring for diagonal portion */ 2393 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2394 for (i=0; i<a->A->n; i++) { 2395 larray[i] = i + a->cstart; 2396 } 2397 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2398 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2399 for (i=0; i<a->A->n; i++) { 2400 colors[i] = coloring->colors[larray[i]]; 2401 } 2402 ierr = PetscFree(larray);CHKERRQ(ierr); 2403 ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2404 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2405 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2406 2407 /* set coloring for off-diagonal portion */ 2408 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2409 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2410 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2411 for (i=0; i<a->B->n; i++) { 2412 colors[i] = coloring->colors[larray[i]]; 2413 } 2414 ierr = PetscFree(larray);CHKERRQ(ierr); 2415 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2416 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2417 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2418 } else { 2419 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2420 } 2421 2422 PetscFunctionReturn(0); 2423 } 2424 2425 #undef __FUNCT__ 2426 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2427 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2428 { 2429 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2430 int ierr; 2431 2432 PetscFunctionBegin; 2433 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2434 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2435 PetscFunctionReturn(0); 2436 } 2437 2438 #undef __FUNCT__ 2439 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2440 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2441 { 2442 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2443 int ierr; 2444 2445 PetscFunctionBegin; 2446 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2447 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2448 PetscFunctionReturn(0); 2449 } 2450