1 /*$Id: mpiaij.c,v 1.337 2001/06/03 02:51:04 bsmith Exp buschelm $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 Scalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(Scalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(Scalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 Scalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(Scalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(Scalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 Scalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 Scalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 Scalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 Scalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 int i,j,rstart,ncols,n,ierr,flg; 354 int *row,*col,other_disassembled; 355 Scalar *val; 356 InsertMode addv = mat->insertmode; 357 358 PetscFunctionBegin; 359 if (!aij->donotstash) { 360 while (1) { 361 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 362 if (!flg) break; 363 364 for (i=0; i<n;) { 365 /* Now identify the consecutive vals belonging to the same row */ 366 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 367 if (j < n) ncols = j-i; 368 else ncols = n-i; 369 /* Now assemble all these values with a single function call */ 370 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 371 i = j; 372 } 373 } 374 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 375 } 376 377 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 378 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 379 380 /* determine if any processor has disassembled, if so we must 381 also disassemble ourselfs, in order that we may reassemble. */ 382 /* 383 if nonzero structure of submatrix B cannot change then we know that 384 no processor disassembled thus we can skip this stuff 385 */ 386 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 387 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 388 if (mat->was_assembled && !other_disassembled) { 389 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 390 } 391 } 392 393 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 398 399 if (aij->rowvalues) { 400 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 401 aij->rowvalues = 0; 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 408 int MatZeroEntries_MPIAIJ(Mat A) 409 { 410 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 411 int ierr; 412 413 PetscFunctionBegin; 414 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 415 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatZeroRows_MPIAIJ" 421 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 422 { 423 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 424 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 425 int *procs,*nprocs,j,idx,nsends,*work,row; 426 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 427 int *rvalues,tag = A->tag,count,base,slen,n,*source; 428 int *lens,imdex,*lrows,*values,rstart=l->rstart; 429 MPI_Comm comm = A->comm; 430 MPI_Request *send_waits,*recv_waits; 431 MPI_Status recv_status,*send_status; 432 IS istmp; 433 PetscTruth found; 434 435 PetscFunctionBegin; 436 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 437 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 438 439 /* first count number of contributors to each processor */ 440 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 441 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 442 procs = nprocs + size; 443 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 444 for (i=0; i<N; i++) { 445 idx = rows[i]; 446 found = PETSC_FALSE; 447 for (j=0; j<size; j++) { 448 if (idx >= owners[j] && idx < owners[j+1]) { 449 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 450 } 451 } 452 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 453 } 454 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 455 456 /* inform other processors of number of messages and max length*/ 457 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 458 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 459 nrecvs = work[size+rank]; 460 nmax = work[rank]; 461 ierr = PetscFree(work);CHKERRQ(ierr); 462 463 /* post receives: */ 464 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 465 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 466 for (i=0; i<nrecvs; i++) { 467 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 468 } 469 470 /* do sends: 471 1) starts[i] gives the starting index in svalues for stuff going to 472 the ith processor 473 */ 474 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 475 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 476 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 477 starts[0] = 0; 478 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 479 for (i=0; i<N; i++) { 480 svalues[starts[owner[i]]++] = rows[i]; 481 } 482 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 483 484 starts[0] = 0; 485 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 486 count = 0; 487 for (i=0; i<size; i++) { 488 if (procs[i]) { 489 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 490 } 491 } 492 ierr = PetscFree(starts);CHKERRQ(ierr); 493 494 base = owners[rank]; 495 496 /* wait on receives */ 497 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 498 source = lens + nrecvs; 499 count = nrecvs; slen = 0; 500 while (count) { 501 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 502 /* unpack receives into our local space */ 503 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 504 source[imdex] = recv_status.MPI_SOURCE; 505 lens[imdex] = n; 506 slen += n; 507 count--; 508 } 509 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 510 511 /* move the data into the send scatter */ 512 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 513 count = 0; 514 for (i=0; i<nrecvs; i++) { 515 values = rvalues + i*nmax; 516 for (j=0; j<lens[i]; j++) { 517 lrows[count++] = values[j] - base; 518 } 519 } 520 ierr = PetscFree(rvalues);CHKERRQ(ierr); 521 ierr = PetscFree(lens);CHKERRQ(ierr); 522 ierr = PetscFree(owner);CHKERRQ(ierr); 523 ierr = PetscFree(nprocs);CHKERRQ(ierr); 524 525 /* actually zap the local rows */ 526 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 527 PetscLogObjectParent(A,istmp); 528 529 /* 530 Zero the required rows. If the "diagonal block" of the matrix 531 is square and the user wishes to set the diagonal we use seperate 532 code so that MatSetValues() is not called for each diagonal allocating 533 new memory, thus calling lots of mallocs and slowing things down. 534 535 Contributed by: Mathew Knepley 536 */ 537 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 538 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 539 if (diag && (l->A->M == l->A->N)) { 540 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 541 } else if (diag) { 542 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 543 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 544 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 546 } 547 for (i = 0; i < slen; i++) { 548 row = lrows[i] + rstart; 549 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 550 } 551 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 552 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 553 } else { 554 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 555 } 556 ierr = ISDestroy(istmp);CHKERRQ(ierr); 557 ierr = PetscFree(lrows);CHKERRQ(ierr); 558 559 /* wait on sends */ 560 if (nsends) { 561 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 562 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 563 ierr = PetscFree(send_status);CHKERRQ(ierr); 564 } 565 ierr = PetscFree(send_waits);CHKERRQ(ierr); 566 ierr = PetscFree(svalues);CHKERRQ(ierr); 567 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "MatMult_MPIAIJ" 573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 574 { 575 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 576 int ierr,nt; 577 578 PetscFunctionBegin; 579 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 580 if (nt != A->n) { 581 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 582 } 583 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 584 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 585 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 586 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatMultAdd_MPIAIJ" 592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 593 { 594 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 595 int ierr; 596 597 PetscFunctionBegin; 598 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 599 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 600 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 601 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 607 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 608 { 609 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 610 int ierr; 611 612 PetscFunctionBegin; 613 /* do nondiagonal part */ 614 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 615 /* send it on its way */ 616 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 617 /* do local part */ 618 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 619 /* receive remote parts: note this assumes the values are not actually */ 620 /* inserted in yy until the next line, which is true for my implementation*/ 621 /* but is not perhaps always true. */ 622 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 628 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 629 { 630 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 631 int ierr; 632 633 PetscFunctionBegin; 634 /* do nondiagonal part */ 635 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 636 /* send it on its way */ 637 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 638 /* do local part */ 639 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 640 /* receive remote parts: note this assumes the values are not actually */ 641 /* inserted in yy until the next line, which is true for my implementation*/ 642 /* but is not perhaps always true. */ 643 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 /* 648 This only works correctly for square matrices where the subblock A->A is the 649 diagonal block 650 */ 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 654 { 655 int ierr; 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 658 PetscFunctionBegin; 659 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 660 if (a->rstart != a->cstart || a->rend != a->cend) { 661 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 662 } 663 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatScale_MPIAIJ" 669 int MatScale_MPIAIJ(Scalar *aa,Mat A) 670 { 671 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 672 int ierr; 673 674 PetscFunctionBegin; 675 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 676 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatDestroy_MPIAIJ" 682 int MatDestroy_MPIAIJ(Mat mat) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 int ierr; 686 687 PetscFunctionBegin; 688 #if defined(PETSC_USE_LOG) 689 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 690 #endif 691 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 692 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 693 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 694 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 695 #if defined (PETSC_USE_CTABLE) 696 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 697 #else 698 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 699 #endif 700 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 701 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 702 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 703 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 704 ierr = PetscFree(aij);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 710 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 711 { 712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 713 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 714 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 715 PetscTruth isdraw,isascii,flg; 716 PetscViewer sviewer; 717 PetscViewerFormat format; 718 719 PetscFunctionBegin; 720 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 721 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 722 if (isascii) { 723 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 724 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 725 MatInfo info; 726 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 727 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 728 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 729 if (flg) { 730 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 731 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 732 } else { 733 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 734 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 735 } 736 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 737 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 738 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 739 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 740 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 741 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } else if (format == PETSC_VIEWER_ASCII_INFO) { 744 PetscFunctionReturn(0); 745 } 746 } else if (isdraw) { 747 PetscDraw draw; 748 PetscTruth isnull; 749 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 750 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 751 } 752 753 if (size == 1) { 754 ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr); 755 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 756 } else { 757 /* assemble the entire matrix onto first processor. */ 758 Mat A; 759 Mat_SeqAIJ *Aloc; 760 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 761 Scalar *a; 762 763 if (!rank) { 764 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 765 } else { 766 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 767 } 768 PetscLogObjectParent(mat,A); 769 770 /* copy over the A part */ 771 Aloc = (Mat_SeqAIJ*)aij->A->data; 772 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 773 row = aij->rstart; 774 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 775 for (i=0; i<m; i++) { 776 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 777 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 778 } 779 aj = Aloc->j; 780 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 781 782 /* copy over the B part */ 783 Aloc = (Mat_SeqAIJ*)aij->B->data; 784 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 785 row = aij->rstart; 786 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 787 ct = cols; 788 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 789 for (i=0; i<m; i++) { 790 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 791 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 792 } 793 ierr = PetscFree(ct);CHKERRQ(ierr); 794 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 795 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 796 /* 797 Everyone has to call to draw the matrix since the graphics waits are 798 synchronized across all processors that share the PetscDraw object 799 */ 800 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 801 if (!rank) { 802 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr); 803 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 804 } 805 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 806 ierr = MatDestroy(A);CHKERRQ(ierr); 807 } 808 PetscFunctionReturn(0); 809 } 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "MatView_MPIAIJ" 813 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 814 { 815 int ierr; 816 PetscTruth isascii,isdraw,issocket,isbinary; 817 818 PetscFunctionBegin; 819 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 820 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 821 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 822 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 823 if (isascii || isdraw || isbinary || issocket) { 824 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 825 } else { 826 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 827 } 828 PetscFunctionReturn(0); 829 } 830 831 /* 832 This has to provide several versions. 833 834 2) a) use only local smoothing updating outer values only once. 835 b) local smoothing updating outer values each inner iteration 836 3) color updating out values betwen colors. 837 */ 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatRelax_MPIAIJ" 840 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx) 841 { 842 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 843 Mat AA = mat->A,BB = mat->B; 844 Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data; 845 Scalar *b,*x,*xs,*ls,d,*v,sum; 846 int ierr,*idx,*diag; 847 int n = matin->n,m = matin->m,i,shift = A->indexshift; 848 849 PetscFunctionBegin; 850 if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);} 851 diag = A->diag; 852 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 853 if (flag & SOR_ZERO_INITIAL_GUESS) { 854 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 855 PetscFunctionReturn(0); 856 } 857 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 858 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 859 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 860 if (xx != bb) { 861 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 862 } else { 863 b = x; 864 } 865 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 866 xs = x + shift; /* shift by one for index start of 1 */ 867 ls = ls + shift; 868 while (its--) { 869 /* go down through the rows */ 870 for (i=0; i<m; i++) { 871 n = A->i[i+1] - A->i[i]; 872 PetscLogFlops(4*n+3); 873 idx = A->j + A->i[i] + shift; 874 v = A->a + A->i[i] + shift; 875 sum = b[i]; 876 SPARSEDENSEMDOT(sum,xs,v,idx,n); 877 d = fshift + A->a[diag[i]+shift]; 878 n = B->i[i+1] - B->i[i]; 879 idx = B->j + B->i[i] + shift; 880 v = B->a + B->i[i] + shift; 881 SPARSEDENSEMDOT(sum,ls,v,idx,n); 882 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 883 } 884 /* come up through the rows */ 885 for (i=m-1; i>-1; i--) { 886 n = A->i[i+1] - A->i[i]; 887 PetscLogFlops(4*n+3); 888 idx = A->j + A->i[i] + shift; 889 v = A->a + A->i[i] + shift; 890 sum = b[i]; 891 SPARSEDENSEMDOT(sum,xs,v,idx,n); 892 d = fshift + A->a[diag[i]+shift]; 893 n = B->i[i+1] - B->i[i]; 894 idx = B->j + B->i[i] + shift; 895 v = B->a + B->i[i] + shift; 896 SPARSEDENSEMDOT(sum,ls,v,idx,n); 897 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 898 } 899 } 900 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 901 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 902 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 903 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 904 if (flag & SOR_ZERO_INITIAL_GUESS) { 905 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 906 PetscFunctionReturn(0); 907 } 908 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 909 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 910 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 911 if (xx != bb) { 912 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 913 } else { 914 b = x; 915 } 916 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 917 xs = x + shift; /* shift by one for index start of 1 */ 918 ls = ls + shift; 919 while (its--) { 920 for (i=0; i<m; i++) { 921 n = A->i[i+1] - A->i[i]; 922 PetscLogFlops(4*n+3); 923 idx = A->j + A->i[i] + shift; 924 v = A->a + A->i[i] + shift; 925 sum = b[i]; 926 SPARSEDENSEMDOT(sum,xs,v,idx,n); 927 d = fshift + A->a[diag[i]+shift]; 928 n = B->i[i+1] - B->i[i]; 929 idx = B->j + B->i[i] + shift; 930 v = B->a + B->i[i] + shift; 931 SPARSEDENSEMDOT(sum,ls,v,idx,n); 932 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 933 } 934 } 935 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 936 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 937 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 938 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 939 if (flag & SOR_ZERO_INITIAL_GUESS) { 940 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 941 PetscFunctionReturn(0); 942 } 943 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 944 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 945 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 946 if (xx != bb) { 947 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 948 } else { 949 b = x; 950 } 951 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 952 xs = x + shift; /* shift by one for index start of 1 */ 953 ls = ls + shift; 954 while (its--) { 955 for (i=m-1; i>-1; i--) { 956 n = A->i[i+1] - A->i[i]; 957 PetscLogFlops(4*n+3); 958 idx = A->j + A->i[i] + shift; 959 v = A->a + A->i[i] + shift; 960 sum = b[i]; 961 SPARSEDENSEMDOT(sum,xs,v,idx,n); 962 d = fshift + A->a[diag[i]+shift]; 963 n = B->i[i+1] - B->i[i]; 964 idx = B->j + B->i[i] + shift; 965 v = B->a + B->i[i] + shift; 966 SPARSEDENSEMDOT(sum,ls,v,idx,n); 967 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 968 } 969 } 970 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 971 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 972 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 973 } else { 974 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 975 } 976 PetscFunctionReturn(0); 977 } 978 979 #undef __FUNCT__ 980 #define __FUNCT__ "MatGetInfo_MPIAIJ" 981 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 982 { 983 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 984 Mat A = mat->A,B = mat->B; 985 int ierr; 986 PetscReal isend[5],irecv[5]; 987 988 PetscFunctionBegin; 989 info->block_size = 1.0; 990 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 991 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 992 isend[3] = info->memory; isend[4] = info->mallocs; 993 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 994 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 995 isend[3] += info->memory; isend[4] += info->mallocs; 996 if (flag == MAT_LOCAL) { 997 info->nz_used = isend[0]; 998 info->nz_allocated = isend[1]; 999 info->nz_unneeded = isend[2]; 1000 info->memory = isend[3]; 1001 info->mallocs = isend[4]; 1002 } else if (flag == MAT_GLOBAL_MAX) { 1003 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1004 info->nz_used = irecv[0]; 1005 info->nz_allocated = irecv[1]; 1006 info->nz_unneeded = irecv[2]; 1007 info->memory = irecv[3]; 1008 info->mallocs = irecv[4]; 1009 } else if (flag == MAT_GLOBAL_SUM) { 1010 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1011 info->nz_used = irecv[0]; 1012 info->nz_allocated = irecv[1]; 1013 info->nz_unneeded = irecv[2]; 1014 info->memory = irecv[3]; 1015 info->mallocs = irecv[4]; 1016 } 1017 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1018 info->fill_ratio_needed = 0; 1019 info->factor_mallocs = 0; 1020 info->rows_global = (double)matin->M; 1021 info->columns_global = (double)matin->N; 1022 info->rows_local = (double)matin->m; 1023 info->columns_local = (double)matin->N; 1024 1025 PetscFunctionReturn(0); 1026 } 1027 1028 #undef __FUNCT__ 1029 #define __FUNCT__ "MatSetOption_MPIAIJ" 1030 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1031 { 1032 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1033 int ierr; 1034 1035 PetscFunctionBegin; 1036 switch (op) { 1037 case MAT_NO_NEW_NONZERO_LOCATIONS: 1038 case MAT_YES_NEW_NONZERO_LOCATIONS: 1039 case MAT_COLUMNS_UNSORTED: 1040 case MAT_COLUMNS_SORTED: 1041 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1042 case MAT_KEEP_ZEROED_ROWS: 1043 case MAT_NEW_NONZERO_LOCATION_ERR: 1044 case MAT_USE_INODES: 1045 case MAT_DO_NOT_USE_INODES: 1046 case MAT_IGNORE_ZERO_ENTRIES: 1047 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1048 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1049 break; 1050 case MAT_ROW_ORIENTED: 1051 a->roworiented = PETSC_TRUE; 1052 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1053 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1054 break; 1055 case MAT_ROWS_SORTED: 1056 case MAT_ROWS_UNSORTED: 1057 case MAT_YES_NEW_DIAGONALS: 1058 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1059 break; 1060 case MAT_COLUMN_ORIENTED: 1061 a->roworiented = PETSC_FALSE; 1062 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1063 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1064 break; 1065 case MAT_IGNORE_OFF_PROC_ENTRIES: 1066 a->donotstash = PETSC_TRUE; 1067 break; 1068 case MAT_NO_NEW_DIAGONALS: 1069 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1070 break; 1071 default: 1072 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1073 break; 1074 } 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatGetOwnershipRange_MPIAIJ" 1080 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1081 { 1082 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1083 1084 PetscFunctionBegin; 1085 if (m) *m = mat->rstart; 1086 if (n) *n = mat->rend; 1087 PetscFunctionReturn(0); 1088 } 1089 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "MatGetRow_MPIAIJ" 1092 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1093 { 1094 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1095 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1096 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1097 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1098 int *cmap,*idx_p; 1099 1100 PetscFunctionBegin; 1101 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1102 mat->getrowactive = PETSC_TRUE; 1103 1104 if (!mat->rowvalues && (idx || v)) { 1105 /* 1106 allocate enough space to hold information from the longest row. 1107 */ 1108 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1109 int max = 1,tmp; 1110 for (i=0; i<matin->m; i++) { 1111 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1112 if (max < tmp) { max = tmp; } 1113 } 1114 ierr = PetscMalloc(max*(sizeof(int)+sizeof(Scalar)),&mat->rowvalues);CHKERRQ(ierr); 1115 mat->rowindices = (int*)(mat->rowvalues + max); 1116 } 1117 1118 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1119 lrow = row - rstart; 1120 1121 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1122 if (!v) {pvA = 0; pvB = 0;} 1123 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1124 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1125 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1126 nztot = nzA + nzB; 1127 1128 cmap = mat->garray; 1129 if (v || idx) { 1130 if (nztot) { 1131 /* Sort by increasing column numbers, assuming A and B already sorted */ 1132 int imark = -1; 1133 if (v) { 1134 *v = v_p = mat->rowvalues; 1135 for (i=0; i<nzB; i++) { 1136 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1137 else break; 1138 } 1139 imark = i; 1140 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1141 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1142 } 1143 if (idx) { 1144 *idx = idx_p = mat->rowindices; 1145 if (imark > -1) { 1146 for (i=0; i<imark; i++) { 1147 idx_p[i] = cmap[cworkB[i]]; 1148 } 1149 } else { 1150 for (i=0; i<nzB; i++) { 1151 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1152 else break; 1153 } 1154 imark = i; 1155 } 1156 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1157 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1158 } 1159 } else { 1160 if (idx) *idx = 0; 1161 if (v) *v = 0; 1162 } 1163 } 1164 *nz = nztot; 1165 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1166 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1172 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1173 { 1174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1175 1176 PetscFunctionBegin; 1177 if (aij->getrowactive == PETSC_FALSE) { 1178 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1179 } 1180 aij->getrowactive = PETSC_FALSE; 1181 PetscFunctionReturn(0); 1182 } 1183 1184 #undef __FUNCT__ 1185 #define __FUNCT__ "MatNorm_MPIAIJ" 1186 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1187 { 1188 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1189 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1190 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1191 PetscReal sum = 0.0; 1192 Scalar *v; 1193 1194 PetscFunctionBegin; 1195 if (aij->size == 1) { 1196 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1197 } else { 1198 if (type == NORM_FROBENIUS) { 1199 v = amat->a; 1200 for (i=0; i<amat->nz; i++) { 1201 #if defined(PETSC_USE_COMPLEX) 1202 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1203 #else 1204 sum += (*v)*(*v); v++; 1205 #endif 1206 } 1207 v = bmat->a; 1208 for (i=0; i<bmat->nz; i++) { 1209 #if defined(PETSC_USE_COMPLEX) 1210 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1211 #else 1212 sum += (*v)*(*v); v++; 1213 #endif 1214 } 1215 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1216 *norm = sqrt(*norm); 1217 } else if (type == NORM_1) { /* max column norm */ 1218 PetscReal *tmp,*tmp2; 1219 int *jj,*garray = aij->garray; 1220 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1221 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1222 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1223 *norm = 0.0; 1224 v = amat->a; jj = amat->j; 1225 for (j=0; j<amat->nz; j++) { 1226 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1227 } 1228 v = bmat->a; jj = bmat->j; 1229 for (j=0; j<bmat->nz; j++) { 1230 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1231 } 1232 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1233 for (j=0; j<mat->N; j++) { 1234 if (tmp2[j] > *norm) *norm = tmp2[j]; 1235 } 1236 ierr = PetscFree(tmp);CHKERRQ(ierr); 1237 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1238 } else if (type == NORM_INFINITY) { /* max row norm */ 1239 PetscReal ntemp = 0.0; 1240 for (j=0; j<aij->A->m; j++) { 1241 v = amat->a + amat->i[j] + shift; 1242 sum = 0.0; 1243 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1244 sum += PetscAbsScalar(*v); v++; 1245 } 1246 v = bmat->a + bmat->i[j] + shift; 1247 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1248 sum += PetscAbsScalar(*v); v++; 1249 } 1250 if (sum > ntemp) ntemp = sum; 1251 } 1252 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1253 } else { 1254 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1255 } 1256 } 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #undef __FUNCT__ 1261 #define __FUNCT__ "MatTranspose_MPIAIJ" 1262 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1263 { 1264 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1265 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1266 int ierr,shift = Aloc->indexshift; 1267 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1268 Mat B; 1269 Scalar *array; 1270 1271 PetscFunctionBegin; 1272 if (!matout && M != N) { 1273 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1274 } 1275 1276 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1277 1278 /* copy over the A part */ 1279 Aloc = (Mat_SeqAIJ*)a->A->data; 1280 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1281 row = a->rstart; 1282 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1283 for (i=0; i<m; i++) { 1284 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1285 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1286 } 1287 aj = Aloc->j; 1288 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1289 1290 /* copy over the B part */ 1291 Aloc = (Mat_SeqAIJ*)a->B->data; 1292 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1293 row = a->rstart; 1294 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1295 ct = cols; 1296 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1297 for (i=0; i<m; i++) { 1298 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1299 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1300 } 1301 ierr = PetscFree(ct);CHKERRQ(ierr); 1302 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1303 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1304 if (matout) { 1305 *matout = B; 1306 } else { 1307 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1308 } 1309 PetscFunctionReturn(0); 1310 } 1311 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1314 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1315 { 1316 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1317 Mat a = aij->A,b = aij->B; 1318 int ierr,s1,s2,s3; 1319 1320 PetscFunctionBegin; 1321 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1322 if (rr) { 1323 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1324 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1325 /* Overlap communication with computation. */ 1326 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1327 } 1328 if (ll) { 1329 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1330 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1331 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1332 } 1333 /* scale the diagonal block */ 1334 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1335 1336 if (rr) { 1337 /* Do a scatter end and then right scale the off-diagonal block */ 1338 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1339 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1340 } 1341 1342 PetscFunctionReturn(0); 1343 } 1344 1345 1346 #undef __FUNCT__ 1347 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1348 int MatPrintHelp_MPIAIJ(Mat A) 1349 { 1350 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1351 int ierr; 1352 1353 PetscFunctionBegin; 1354 if (!a->rank) { 1355 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1356 } 1357 PetscFunctionReturn(0); 1358 } 1359 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1362 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1363 { 1364 PetscFunctionBegin; 1365 *bs = 1; 1366 PetscFunctionReturn(0); 1367 } 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1370 int MatSetUnfactored_MPIAIJ(Mat A) 1371 { 1372 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1373 int ierr; 1374 1375 PetscFunctionBegin; 1376 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } 1379 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "MatEqual_MPIAIJ" 1382 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1383 { 1384 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1385 Mat a,b,c,d; 1386 PetscTruth flg; 1387 int ierr; 1388 1389 PetscFunctionBegin; 1390 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1391 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1392 a = matA->A; b = matA->B; 1393 c = matB->A; d = matB->B; 1394 1395 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1396 if (flg == PETSC_TRUE) { 1397 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1398 } 1399 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1400 PetscFunctionReturn(0); 1401 } 1402 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatCopy_MPIAIJ" 1405 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1406 { 1407 int ierr; 1408 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1409 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1410 PetscTruth flg; 1411 1412 PetscFunctionBegin; 1413 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1414 if (str != SAME_NONZERO_PATTERN || !flg) { 1415 /* because of the column compression in the off-processor part of the matrix a->B, 1416 the number of columns in a->B and b->B may be different, hence we cannot call 1417 the MatCopy() directly on the two parts. If need be, we can provide a more 1418 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1419 then copying the submatrices */ 1420 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1421 } else { 1422 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1423 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1424 } 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1430 int MatSetUpPreallocation_MPIAIJ(Mat A) 1431 { 1432 int ierr; 1433 1434 PetscFunctionBegin; 1435 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 1439 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1440 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1441 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1442 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1443 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1444 #if !defined(PETSC_USE_COMPLEX) 1445 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1446 #endif 1447 1448 /* -------------------------------------------------------------------*/ 1449 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1450 MatGetRow_MPIAIJ, 1451 MatRestoreRow_MPIAIJ, 1452 MatMult_MPIAIJ, 1453 MatMultAdd_MPIAIJ, 1454 MatMultTranspose_MPIAIJ, 1455 MatMultTransposeAdd_MPIAIJ, 1456 0, 1457 0, 1458 0, 1459 0, 1460 0, 1461 0, 1462 MatRelax_MPIAIJ, 1463 MatTranspose_MPIAIJ, 1464 MatGetInfo_MPIAIJ, 1465 MatEqual_MPIAIJ, 1466 MatGetDiagonal_MPIAIJ, 1467 MatDiagonalScale_MPIAIJ, 1468 MatNorm_MPIAIJ, 1469 MatAssemblyBegin_MPIAIJ, 1470 MatAssemblyEnd_MPIAIJ, 1471 0, 1472 MatSetOption_MPIAIJ, 1473 MatZeroEntries_MPIAIJ, 1474 MatZeroRows_MPIAIJ, 1475 #if !defined(PETSC_USE_COMPLEX) 1476 MatLUFactorSymbolic_MPIAIJ_TFS, 1477 #else 1478 0, 1479 #endif 1480 0, 1481 0, 1482 0, 1483 MatSetUpPreallocation_MPIAIJ, 1484 0, 1485 MatGetOwnershipRange_MPIAIJ, 1486 0, 1487 0, 1488 0, 1489 0, 1490 MatDuplicate_MPIAIJ, 1491 0, 1492 0, 1493 0, 1494 0, 1495 0, 1496 MatGetSubMatrices_MPIAIJ, 1497 MatIncreaseOverlap_MPIAIJ, 1498 MatGetValues_MPIAIJ, 1499 MatCopy_MPIAIJ, 1500 MatPrintHelp_MPIAIJ, 1501 MatScale_MPIAIJ, 1502 0, 1503 0, 1504 0, 1505 MatGetBlockSize_MPIAIJ, 1506 0, 1507 0, 1508 0, 1509 0, 1510 MatFDColoringCreate_MPIAIJ, 1511 0, 1512 MatSetUnfactored_MPIAIJ, 1513 0, 1514 0, 1515 MatGetSubMatrix_MPIAIJ, 1516 MatDestroy_MPIAIJ, 1517 MatView_MPIAIJ, 1518 MatGetMaps_Petsc, 1519 0, 1520 0, 1521 0, 1522 0, 1523 0, 1524 0, 1525 0, 1526 0, 1527 MatSetColoring_MPIAIJ, 1528 MatSetValuesAdic_MPIAIJ, 1529 MatSetValuesAdifor_MPIAIJ 1530 }; 1531 1532 /* ----------------------------------------------------------------------------------------*/ 1533 1534 EXTERN_C_BEGIN 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1537 int MatStoreValues_MPIAIJ(Mat mat) 1538 { 1539 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1540 int ierr; 1541 1542 PetscFunctionBegin; 1543 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1544 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1545 PetscFunctionReturn(0); 1546 } 1547 EXTERN_C_END 1548 1549 EXTERN_C_BEGIN 1550 #undef __FUNCT__ 1551 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1552 int MatRetrieveValues_MPIAIJ(Mat mat) 1553 { 1554 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1555 int ierr; 1556 1557 PetscFunctionBegin; 1558 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1559 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1560 PetscFunctionReturn(0); 1561 } 1562 EXTERN_C_END 1563 1564 #include "petscpc.h" 1565 EXTERN_C_BEGIN 1566 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1567 EXTERN_C_END 1568 1569 EXTERN_C_BEGIN 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "MatCreate_MPIAIJ" 1572 int MatCreate_MPIAIJ(Mat B) 1573 { 1574 Mat_MPIAIJ *b; 1575 int ierr,i,size; 1576 1577 PetscFunctionBegin; 1578 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1579 1580 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1581 B->data = (void*)b; 1582 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1583 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1584 B->factor = 0; 1585 B->assembled = PETSC_FALSE; 1586 B->mapping = 0; 1587 1588 B->insertmode = NOT_SET_VALUES; 1589 b->size = size; 1590 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1591 1592 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1593 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1594 1595 /* the information in the maps duplicates the information computed below, eventually 1596 we should remove the duplicate information that is not contained in the maps */ 1597 ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1598 ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1599 1600 /* build local table of row and column ownerships */ 1601 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1602 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1603 b->cowners = b->rowners + b->size + 2; 1604 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1605 b->rowners[0] = 0; 1606 for (i=2; i<=b->size; i++) { 1607 b->rowners[i] += b->rowners[i-1]; 1608 } 1609 b->rstart = b->rowners[b->rank]; 1610 b->rend = b->rowners[b->rank+1]; 1611 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1612 b->cowners[0] = 0; 1613 for (i=2; i<=b->size; i++) { 1614 b->cowners[i] += b->cowners[i-1]; 1615 } 1616 b->cstart = b->cowners[b->rank]; 1617 b->cend = b->cowners[b->rank+1]; 1618 1619 /* build cache for off array entries formed */ 1620 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1621 b->donotstash = PETSC_FALSE; 1622 b->colmap = 0; 1623 b->garray = 0; 1624 b->roworiented = PETSC_TRUE; 1625 1626 /* stuff used for matrix vector multiply */ 1627 b->lvec = PETSC_NULL; 1628 b->Mvctx = PETSC_NULL; 1629 1630 /* stuff for MatGetRow() */ 1631 b->rowindices = 0; 1632 b->rowvalues = 0; 1633 b->getrowactive = PETSC_FALSE; 1634 1635 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1636 "MatStoreValues_MPIAIJ", 1637 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1638 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1639 "MatRetrieveValues_MPIAIJ", 1640 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1641 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1642 "MatGetDiagonalBlock_MPIAIJ", 1643 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1644 PetscFunctionReturn(0); 1645 } 1646 EXTERN_C_END 1647 1648 #undef __FUNCT__ 1649 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1650 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1651 { 1652 Mat mat; 1653 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1654 int ierr; 1655 1656 PetscFunctionBegin; 1657 *newmat = 0; 1658 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1659 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1660 a = (Mat_MPIAIJ*)mat->data; 1661 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1662 mat->factor = matin->factor; 1663 mat->assembled = PETSC_TRUE; 1664 mat->insertmode = NOT_SET_VALUES; 1665 mat->preallocated = PETSC_TRUE; 1666 1667 a->rstart = oldmat->rstart; 1668 a->rend = oldmat->rend; 1669 a->cstart = oldmat->cstart; 1670 a->cend = oldmat->cend; 1671 a->size = oldmat->size; 1672 a->rank = oldmat->rank; 1673 a->donotstash = oldmat->donotstash; 1674 a->roworiented = oldmat->roworiented; 1675 a->rowindices = 0; 1676 a->rowvalues = 0; 1677 a->getrowactive = PETSC_FALSE; 1678 1679 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1680 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1681 if (oldmat->colmap) { 1682 #if defined (PETSC_USE_CTABLE) 1683 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1684 #else 1685 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1686 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1687 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1688 #endif 1689 } else a->colmap = 0; 1690 if (oldmat->garray) { 1691 int len; 1692 len = oldmat->B->n; 1693 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1694 PetscLogObjectMemory(mat,len*sizeof(int)); 1695 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1696 } else a->garray = 0; 1697 1698 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1699 PetscLogObjectParent(mat,a->lvec); 1700 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1701 PetscLogObjectParent(mat,a->Mvctx); 1702 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1703 PetscLogObjectParent(mat,a->A); 1704 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1705 PetscLogObjectParent(mat,a->B); 1706 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1707 *newmat = mat; 1708 PetscFunctionReturn(0); 1709 } 1710 1711 #include "petscsys.h" 1712 1713 EXTERN_C_BEGIN 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "MatLoad_MPIAIJ" 1716 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1717 { 1718 Mat A; 1719 Scalar *vals,*svals; 1720 MPI_Comm comm = ((PetscObject)viewer)->comm; 1721 MPI_Status status; 1722 int i,nz,ierr,j,rstart,rend,fd; 1723 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1724 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1725 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1726 1727 PetscFunctionBegin; 1728 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1729 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1730 if (!rank) { 1731 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1732 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1733 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1734 if (header[3] < 0) { 1735 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1736 } 1737 } 1738 1739 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1740 M = header[1]; N = header[2]; 1741 /* determine ownership of all rows */ 1742 m = M/size + ((M % size) > rank); 1743 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1744 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1745 rowners[0] = 0; 1746 for (i=2; i<=size; i++) { 1747 rowners[i] += rowners[i-1]; 1748 } 1749 rstart = rowners[rank]; 1750 rend = rowners[rank+1]; 1751 1752 /* distribute row lengths to all processors */ 1753 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1754 offlens = ourlens + (rend-rstart); 1755 if (!rank) { 1756 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1757 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1758 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1759 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1760 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1761 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1762 } else { 1763 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1764 } 1765 1766 if (!rank) { 1767 /* calculate the number of nonzeros on each processor */ 1768 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1769 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1770 for (i=0; i<size; i++) { 1771 for (j=rowners[i]; j< rowners[i+1]; j++) { 1772 procsnz[i] += rowlengths[j]; 1773 } 1774 } 1775 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1776 1777 /* determine max buffer needed and allocate it */ 1778 maxnz = 0; 1779 for (i=0; i<size; i++) { 1780 maxnz = PetscMax(maxnz,procsnz[i]); 1781 } 1782 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1783 1784 /* read in my part of the matrix column indices */ 1785 nz = procsnz[0]; 1786 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1787 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1788 1789 /* read in every one elses and ship off */ 1790 for (i=1; i<size; i++) { 1791 nz = procsnz[i]; 1792 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1793 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1794 } 1795 ierr = PetscFree(cols);CHKERRQ(ierr); 1796 } else { 1797 /* determine buffer space needed for message */ 1798 nz = 0; 1799 for (i=0; i<m; i++) { 1800 nz += ourlens[i]; 1801 } 1802 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1803 1804 /* receive message of column indices*/ 1805 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1806 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1807 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1808 } 1809 1810 /* determine column ownership if matrix is not square */ 1811 if (N != M) { 1812 n = N/size + ((N % size) > rank); 1813 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1814 cstart = cend - n; 1815 } else { 1816 cstart = rstart; 1817 cend = rend; 1818 n = cend - cstart; 1819 } 1820 1821 /* loop over local rows, determining number of off diagonal entries */ 1822 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1823 jj = 0; 1824 for (i=0; i<m; i++) { 1825 for (j=0; j<ourlens[i]; j++) { 1826 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1827 jj++; 1828 } 1829 } 1830 1831 /* create our matrix */ 1832 for (i=0; i<m; i++) { 1833 ourlens[i] -= offlens[i]; 1834 } 1835 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1836 A = *newmat; 1837 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1838 for (i=0; i<m; i++) { 1839 ourlens[i] += offlens[i]; 1840 } 1841 1842 if (!rank) { 1843 ierr = PetscMalloc(maxnz*sizeof(Scalar),&vals);CHKERRQ(ierr); 1844 1845 /* read in my part of the matrix numerical values */ 1846 nz = procsnz[0]; 1847 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1848 1849 /* insert into matrix */ 1850 jj = rstart; 1851 smycols = mycols; 1852 svals = vals; 1853 for (i=0; i<m; i++) { 1854 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1855 smycols += ourlens[i]; 1856 svals += ourlens[i]; 1857 jj++; 1858 } 1859 1860 /* read in other processors and ship out */ 1861 for (i=1; i<size; i++) { 1862 nz = procsnz[i]; 1863 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1864 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1865 } 1866 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1867 } else { 1868 /* receive numeric values */ 1869 ierr = PetscMalloc((nz+1)*sizeof(Scalar),&vals);CHKERRQ(ierr); 1870 1871 /* receive message of values*/ 1872 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1873 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1874 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1875 1876 /* insert into matrix */ 1877 jj = rstart; 1878 smycols = mycols; 1879 svals = vals; 1880 for (i=0; i<m; i++) { 1881 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1882 smycols += ourlens[i]; 1883 svals += ourlens[i]; 1884 jj++; 1885 } 1886 } 1887 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1888 ierr = PetscFree(vals);CHKERRQ(ierr); 1889 ierr = PetscFree(mycols);CHKERRQ(ierr); 1890 ierr = PetscFree(rowners);CHKERRQ(ierr); 1891 1892 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1893 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1894 PetscFunctionReturn(0); 1895 } 1896 EXTERN_C_END 1897 1898 #undef __FUNCT__ 1899 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1900 /* 1901 Not great since it makes two copies of the submatrix, first an SeqAIJ 1902 in local and then by concatenating the local matrices the end result. 1903 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1904 */ 1905 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1906 { 1907 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1908 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1909 Mat *local,M,Mreuse; 1910 Scalar *vwork,*aa; 1911 MPI_Comm comm = mat->comm; 1912 Mat_SeqAIJ *aij; 1913 1914 1915 PetscFunctionBegin; 1916 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1917 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1918 1919 if (call == MAT_REUSE_MATRIX) { 1920 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1921 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1922 local = &Mreuse; 1923 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1924 } else { 1925 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1926 Mreuse = *local; 1927 ierr = PetscFree(local);CHKERRQ(ierr); 1928 } 1929 1930 /* 1931 m - number of local rows 1932 n - number of columns (same on all processors) 1933 rstart - first row in new global matrix generated 1934 */ 1935 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1936 if (call == MAT_INITIAL_MATRIX) { 1937 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1938 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1939 ii = aij->i; 1940 jj = aij->j; 1941 1942 /* 1943 Determine the number of non-zeros in the diagonal and off-diagonal 1944 portions of the matrix in order to do correct preallocation 1945 */ 1946 1947 /* first get start and end of "diagonal" columns */ 1948 if (csize == PETSC_DECIDE) { 1949 nlocal = n/size + ((n % size) > rank); 1950 } else { 1951 nlocal = csize; 1952 } 1953 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1954 rstart = rend - nlocal; 1955 if (rank == size - 1 && rend != n) { 1956 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1957 } 1958 1959 /* next, compute all the lengths */ 1960 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1961 olens = dlens + m; 1962 for (i=0; i<m; i++) { 1963 jend = ii[i+1] - ii[i]; 1964 olen = 0; 1965 dlen = 0; 1966 for (j=0; j<jend; j++) { 1967 if (*jj < rstart || *jj >= rend) olen++; 1968 else dlen++; 1969 jj++; 1970 } 1971 olens[i] = olen; 1972 dlens[i] = dlen; 1973 } 1974 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1975 ierr = PetscFree(dlens);CHKERRQ(ierr); 1976 } else { 1977 int ml,nl; 1978 1979 M = *newmat; 1980 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1981 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1982 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1983 /* 1984 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1985 rather than the slower MatSetValues(). 1986 */ 1987 M->was_assembled = PETSC_TRUE; 1988 M->assembled = PETSC_FALSE; 1989 } 1990 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1991 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1992 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1993 ii = aij->i; 1994 jj = aij->j; 1995 aa = aij->a; 1996 for (i=0; i<m; i++) { 1997 row = rstart + i; 1998 nz = ii[i+1] - ii[i]; 1999 cwork = jj; jj += nz; 2000 vwork = aa; aa += nz; 2001 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2002 } 2003 2004 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2005 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2006 *newmat = M; 2007 2008 /* save submatrix used in processor for next request */ 2009 if (call == MAT_INITIAL_MATRIX) { 2010 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2011 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2012 } 2013 2014 PetscFunctionReturn(0); 2015 } 2016 2017 #undef __FUNCT__ 2018 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2019 /*@C 2020 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2021 (the default parallel PETSc format). For good matrix assembly performance 2022 the user should preallocate the matrix storage by setting the parameters 2023 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2024 performance can be increased by more than a factor of 50. 2025 2026 Collective on MPI_Comm 2027 2028 Input Parameters: 2029 + A - the matrix 2030 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2031 (same value is used for all local rows) 2032 . d_nnz - array containing the number of nonzeros in the various rows of the 2033 DIAGONAL portion of the local submatrix (possibly different for each row) 2034 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2035 The size of this array is equal to the number of local rows, i.e 'm'. 2036 You must leave room for the diagonal entry even if it is zero. 2037 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2038 submatrix (same value is used for all local rows). 2039 - o_nnz - array containing the number of nonzeros in the various rows of the 2040 OFF-DIAGONAL portion of the local submatrix (possibly different for 2041 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2042 structure. The size of this array is equal to the number 2043 of local rows, i.e 'm'. 2044 2045 The AIJ format (also called the Yale sparse matrix format or 2046 compressed row storage), is fully compatible with standard Fortran 77 2047 storage. That is, the stored row and column indices can begin at 2048 either one (as in Fortran) or zero. See the users manual for details. 2049 2050 The user MUST specify either the local or global matrix dimensions 2051 (possibly both). 2052 2053 The parallel matrix is partitioned such that the first m0 rows belong to 2054 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2055 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2056 2057 The DIAGONAL portion of the local submatrix of a processor can be defined 2058 as the submatrix which is obtained by extraction the part corresponding 2059 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2060 first row that belongs to the processor, and r2 is the last row belonging 2061 to the this processor. This is a square mxm matrix. The remaining portion 2062 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2063 2064 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2065 2066 By default, this format uses inodes (identical nodes) when possible. 2067 We search for consecutive rows with the same nonzero structure, thereby 2068 reusing matrix information to achieve increased efficiency. 2069 2070 Options Database Keys: 2071 + -mat_aij_no_inode - Do not use inodes 2072 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2073 - -mat_aij_oneindex - Internally use indexing starting at 1 2074 rather than 0. Note that when calling MatSetValues(), 2075 the user still MUST index entries starting at 0! 2076 2077 Example usage: 2078 2079 Consider the following 8x8 matrix with 34 non-zero values, that is 2080 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2081 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2082 as follows: 2083 2084 .vb 2085 1 2 0 | 0 3 0 | 0 4 2086 Proc0 0 5 6 | 7 0 0 | 8 0 2087 9 0 10 | 11 0 0 | 12 0 2088 ------------------------------------- 2089 13 0 14 | 15 16 17 | 0 0 2090 Proc1 0 18 0 | 19 20 21 | 0 0 2091 0 0 0 | 22 23 0 | 24 0 2092 ------------------------------------- 2093 Proc2 25 26 27 | 0 0 28 | 29 0 2094 30 0 0 | 31 32 33 | 0 34 2095 .ve 2096 2097 This can be represented as a collection of submatrices as: 2098 2099 .vb 2100 A B C 2101 D E F 2102 G H I 2103 .ve 2104 2105 Where the submatrices A,B,C are owned by proc0, D,E,F are 2106 owned by proc1, G,H,I are owned by proc2. 2107 2108 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2109 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2110 The 'M','N' parameters are 8,8, and have the same values on all procs. 2111 2112 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2113 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2114 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2115 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2116 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2117 matrix, ans [DF] as another SeqAIJ matrix. 2118 2119 When d_nz, o_nz parameters are specified, d_nz storage elements are 2120 allocated for every row of the local diagonal submatrix, and o_nz 2121 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2122 One way to choose d_nz and o_nz is to use the max nonzerors per local 2123 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2124 In this case, the values of d_nz,o_nz are: 2125 .vb 2126 proc0 : dnz = 2, o_nz = 2 2127 proc1 : dnz = 3, o_nz = 2 2128 proc2 : dnz = 1, o_nz = 4 2129 .ve 2130 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2131 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2132 for proc3. i.e we are using 12+15+10=37 storage locations to store 2133 34 values. 2134 2135 When d_nnz, o_nnz parameters are specified, the storage is specified 2136 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2137 In the above case the values for d_nnz,o_nnz are: 2138 .vb 2139 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2140 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2141 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2142 .ve 2143 Here the space allocated is sum of all the above values i.e 34, and 2144 hence pre-allocation is perfect. 2145 2146 Level: intermediate 2147 2148 .keywords: matrix, aij, compressed row, sparse, parallel 2149 2150 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2151 @*/ 2152 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2153 { 2154 Mat_MPIAIJ *b; 2155 int ierr,i; 2156 PetscTruth flg2; 2157 2158 PetscFunctionBegin; 2159 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2160 if (!flg2) PetscFunctionReturn(0); 2161 B->preallocated = PETSC_TRUE; 2162 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2163 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2164 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2165 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2166 if (d_nnz) { 2167 for (i=0; i<B->m; i++) { 2168 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2169 } 2170 } 2171 if (o_nnz) { 2172 for (i=0; i<B->m; i++) { 2173 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2174 } 2175 } 2176 b = (Mat_MPIAIJ*)B->data; 2177 2178 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2179 PetscLogObjectParent(B,b->A); 2180 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2181 PetscLogObjectParent(B,b->B); 2182 2183 PetscFunctionReturn(0); 2184 } 2185 2186 #undef __FUNCT__ 2187 #define __FUNCT__ "MatCreateMPIAIJ" 2188 /*@C 2189 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2190 (the default parallel PETSc format). For good matrix assembly performance 2191 the user should preallocate the matrix storage by setting the parameters 2192 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2193 performance can be increased by more than a factor of 50. 2194 2195 Collective on MPI_Comm 2196 2197 Input Parameters: 2198 + comm - MPI communicator 2199 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2200 This value should be the same as the local size used in creating the 2201 y vector for the matrix-vector product y = Ax. 2202 . n - This value should be the same as the local size used in creating the 2203 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2204 calculated if N is given) For square matrices n is almost always m. 2205 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2206 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2207 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2208 (same value is used for all local rows) 2209 . d_nnz - array containing the number of nonzeros in the various rows of the 2210 DIAGONAL portion of the local submatrix (possibly different for each row) 2211 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2212 The size of this array is equal to the number of local rows, i.e 'm'. 2213 You must leave room for the diagonal entry even if it is zero. 2214 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2215 submatrix (same value is used for all local rows). 2216 - o_nnz - array containing the number of nonzeros in the various rows of the 2217 OFF-DIAGONAL portion of the local submatrix (possibly different for 2218 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2219 structure. The size of this array is equal to the number 2220 of local rows, i.e 'm'. 2221 2222 Output Parameter: 2223 . A - the matrix 2224 2225 Notes: 2226 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2227 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2228 storage requirements for this matrix. 2229 2230 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2231 processor than it must be used on all processors that share the object for 2232 that argument. 2233 2234 The AIJ format (also called the Yale sparse matrix format or 2235 compressed row storage), is fully compatible with standard Fortran 77 2236 storage. That is, the stored row and column indices can begin at 2237 either one (as in Fortran) or zero. See the users manual for details. 2238 2239 The user MUST specify either the local or global matrix dimensions 2240 (possibly both). 2241 2242 The parallel matrix is partitioned such that the first m0 rows belong to 2243 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2244 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2245 2246 The DIAGONAL portion of the local submatrix of a processor can be defined 2247 as the submatrix which is obtained by extraction the part corresponding 2248 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2249 first row that belongs to the processor, and r2 is the last row belonging 2250 to the this processor. This is a square mxm matrix. The remaining portion 2251 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2252 2253 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2254 2255 By default, this format uses inodes (identical nodes) when possible. 2256 We search for consecutive rows with the same nonzero structure, thereby 2257 reusing matrix information to achieve increased efficiency. 2258 2259 Options Database Keys: 2260 + -mat_aij_no_inode - Do not use inodes 2261 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2262 - -mat_aij_oneindex - Internally use indexing starting at 1 2263 rather than 0. Note that when calling MatSetValues(), 2264 the user still MUST index entries starting at 0! 2265 2266 2267 Example usage: 2268 2269 Consider the following 8x8 matrix with 34 non-zero values, that is 2270 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2271 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2272 as follows: 2273 2274 .vb 2275 1 2 0 | 0 3 0 | 0 4 2276 Proc0 0 5 6 | 7 0 0 | 8 0 2277 9 0 10 | 11 0 0 | 12 0 2278 ------------------------------------- 2279 13 0 14 | 15 16 17 | 0 0 2280 Proc1 0 18 0 | 19 20 21 | 0 0 2281 0 0 0 | 22 23 0 | 24 0 2282 ------------------------------------- 2283 Proc2 25 26 27 | 0 0 28 | 29 0 2284 30 0 0 | 31 32 33 | 0 34 2285 .ve 2286 2287 This can be represented as a collection of submatrices as: 2288 2289 .vb 2290 A B C 2291 D E F 2292 G H I 2293 .ve 2294 2295 Where the submatrices A,B,C are owned by proc0, D,E,F are 2296 owned by proc1, G,H,I are owned by proc2. 2297 2298 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2299 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2300 The 'M','N' parameters are 8,8, and have the same values on all procs. 2301 2302 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2303 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2304 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2305 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2306 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2307 matrix, ans [DF] as another SeqAIJ matrix. 2308 2309 When d_nz, o_nz parameters are specified, d_nz storage elements are 2310 allocated for every row of the local diagonal submatrix, and o_nz 2311 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2312 One way to choose d_nz and o_nz is to use the max nonzerors per local 2313 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2314 In this case, the values of d_nz,o_nz are: 2315 .vb 2316 proc0 : dnz = 2, o_nz = 2 2317 proc1 : dnz = 3, o_nz = 2 2318 proc2 : dnz = 1, o_nz = 4 2319 .ve 2320 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2321 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2322 for proc3. i.e we are using 12+15+10=37 storage locations to store 2323 34 values. 2324 2325 When d_nnz, o_nnz parameters are specified, the storage is specified 2326 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2327 In the above case the values for d_nnz,o_nnz are: 2328 .vb 2329 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2330 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2331 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2332 .ve 2333 Here the space allocated is sum of all the above values i.e 34, and 2334 hence pre-allocation is perfect. 2335 2336 Level: intermediate 2337 2338 .keywords: matrix, aij, compressed row, sparse, parallel 2339 2340 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2341 @*/ 2342 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2343 { 2344 int ierr,size; 2345 2346 PetscFunctionBegin; 2347 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2348 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2349 if (size > 1) { 2350 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2351 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2352 } else { 2353 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2354 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2355 } 2356 PetscFunctionReturn(0); 2357 } 2358 2359 #undef __FUNCT__ 2360 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2361 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2362 { 2363 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2364 PetscFunctionBegin; 2365 *Ad = a->A; 2366 *Ao = a->B; 2367 *colmap = a->garray; 2368 PetscFunctionReturn(0); 2369 } 2370 2371 #undef __FUNCT__ 2372 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2373 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2374 { 2375 int ierr; 2376 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2377 2378 PetscFunctionBegin; 2379 if (coloring->ctype == IS_COLORING_LOCAL) { 2380 int *allcolors,*colors,i; 2381 ISColoring ocoloring; 2382 2383 /* set coloring for diagonal portion */ 2384 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2385 2386 /* set coloring for off-diagonal portion */ 2387 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2388 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2389 for (i=0; i<a->B->n; i++) { 2390 colors[i] = allcolors[a->garray[i]]; 2391 } 2392 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2393 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2394 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2395 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2396 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2397 int *colors,i,*larray; 2398 ISColoring ocoloring; 2399 2400 /* set coloring for diagonal portion */ 2401 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2402 for (i=0; i<a->A->n; i++) { 2403 larray[i] = i + a->cstart; 2404 } 2405 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2406 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2407 for (i=0; i<a->A->n; i++) { 2408 colors[i] = coloring->colors[larray[i]]; 2409 } 2410 ierr = PetscFree(larray);CHKERRQ(ierr); 2411 ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2412 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2413 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2414 2415 /* set coloring for off-diagonal portion */ 2416 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2417 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2418 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2419 for (i=0; i<a->B->n; i++) { 2420 colors[i] = coloring->colors[larray[i]]; 2421 } 2422 ierr = PetscFree(larray);CHKERRQ(ierr); 2423 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2424 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2425 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2426 } else { 2427 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2428 } 2429 2430 PetscFunctionReturn(0); 2431 } 2432 2433 #undef __FUNCT__ 2434 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2435 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2436 { 2437 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2438 int ierr; 2439 2440 PetscFunctionBegin; 2441 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2442 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2443 PetscFunctionReturn(0); 2444 } 2445 2446 #undef __FUNCT__ 2447 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2448 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2449 { 2450 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2451 int ierr; 2452 2453 PetscFunctionBegin; 2454 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2455 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2456 PetscFunctionReturn(0); 2457 } 2458