1 /*$Id: mpiaij.c,v 1.335 2001/04/21 21:13:28 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat); 8 EXTERN int DisAssemble_MPIAIJ(Mat); 9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 12 EXTERN int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNCT__ 22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 int n = aij->B->n,i,ierr; 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr); 31 for (i=0; i<n; i++){ 32 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 33 } 34 #else 35 ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr); 36 PetscLogObjectMemory(mat,mat->N*sizeof(int)); 37 ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr); 38 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 39 #endif 40 PetscFunctionReturn(0); 41 } 42 43 #define CHUNKSIZE 15 44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 45 { \ 46 \ 47 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 48 rmax = aimax[row]; nrow = ailen[row]; \ 49 col1 = col - shift; \ 50 \ 51 low = 0; high = nrow; \ 52 while (high-low > 5) { \ 53 t = (low+high)/2; \ 54 if (rp[t] > col) high = t; \ 55 else low = t; \ 56 } \ 57 for (_i=low; _i<high; _i++) { \ 58 if (rp[_i] > col1) break; \ 59 if (rp[_i] == col1) { \ 60 if (addv == ADD_VALUES) ap[_i] += value; \ 61 else ap[_i] = value; \ 62 goto a_noinsert; \ 63 } \ 64 } \ 65 if (nonew == 1) goto a_noinsert; \ 66 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 67 if (nrow >= rmax) { \ 68 /* there is no extra room in row, therefore enlarge */ \ 69 int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \ 70 Scalar *new_a; \ 71 \ 72 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 73 \ 74 /* malloc new storage space */ \ 75 len = new_nz*(sizeof(int)+sizeof(Scalar))+(am+1)*sizeof(int); \ 76 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 77 new_j = (int*)(new_a + new_nz); \ 78 new_i = new_j + new_nz; \ 79 \ 80 /* copy over old data into new slots */ \ 81 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 82 for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 83 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 84 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 85 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 86 len*sizeof(int));CHKERRQ(ierr); \ 87 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 89 len*sizeof(Scalar));CHKERRQ(ierr); \ 90 /* free up old matrix storage */ \ 91 \ 92 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 93 if (!a->singlemalloc) { \ 94 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 95 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 96 } \ 97 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 98 a->singlemalloc = PETSC_TRUE; \ 99 \ 100 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 101 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 102 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 103 a->maxnz += CHUNKSIZE; \ 104 a->reallocs++; \ 105 } \ 106 N = nrow++ - 1; a->nz++; \ 107 /* shift up all the later entries in this row */ \ 108 for (ii=N; ii>=_i; ii--) { \ 109 rp[ii+1] = rp[ii]; \ 110 ap[ii+1] = ap[ii]; \ 111 } \ 112 rp[_i] = col1; \ 113 ap[_i] = value; \ 114 a_noinsert: ; \ 115 ailen[row] = nrow; \ 116 } 117 118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 119 { \ 120 \ 121 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 122 rmax = bimax[row]; nrow = bilen[row]; \ 123 col1 = col - shift; \ 124 \ 125 low = 0; high = nrow; \ 126 while (high-low > 5) { \ 127 t = (low+high)/2; \ 128 if (rp[t] > col) high = t; \ 129 else low = t; \ 130 } \ 131 for (_i=low; _i<high; _i++) { \ 132 if (rp[_i] > col1) break; \ 133 if (rp[_i] == col1) { \ 134 if (addv == ADD_VALUES) ap[_i] += value; \ 135 else ap[_i] = value; \ 136 goto b_noinsert; \ 137 } \ 138 } \ 139 if (nonew == 1) goto b_noinsert; \ 140 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \ 141 if (nrow >= rmax) { \ 142 /* there is no extra room in row, therefore enlarge */ \ 143 int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \ 144 Scalar *new_a; \ 145 \ 146 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \ 147 \ 148 /* malloc new storage space */ \ 149 len = new_nz*(sizeof(int)+sizeof(Scalar))+(bm+1)*sizeof(int); \ 150 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \ 151 new_j = (int*)(new_a + new_nz); \ 152 new_i = new_j + new_nz; \ 153 \ 154 /* copy over old data into new slots */ \ 155 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 156 for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 157 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 158 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 159 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 160 len*sizeof(int));CHKERRQ(ierr); \ 161 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 163 len*sizeof(Scalar));CHKERRQ(ierr); \ 164 /* free up old matrix storage */ \ 165 \ 166 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 167 if (!b->singlemalloc) { \ 168 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 169 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 170 } \ 171 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 172 b->singlemalloc = PETSC_TRUE; \ 173 \ 174 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 175 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 176 PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 177 b->maxnz += CHUNKSIZE; \ 178 b->reallocs++; \ 179 } \ 180 N = nrow++ - 1; b->nz++; \ 181 /* shift up all the later entries in this row */ \ 182 for (ii=N; ii>=_i; ii--) { \ 183 rp[ii+1] = rp[ii]; \ 184 ap[ii+1] = ap[ii]; \ 185 } \ 186 rp[_i] = col1; \ 187 ap[_i] = value; \ 188 b_noinsert: ; \ 189 bilen[row] = nrow; \ 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatSetValues_MPIAIJ" 194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 195 { 196 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 197 Scalar value; 198 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 199 int cstart = aij->cstart,cend = aij->cend,row,col; 200 PetscTruth roworiented = aij->roworiented; 201 202 /* Some Variables required in the macro */ 203 Mat A = aij->A; 204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 205 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 206 Scalar *aa = a->a; 207 PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 208 Mat B = aij->B; 209 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 210 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m; 211 Scalar *ba = b->a; 212 213 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 214 int nonew = a->nonew,shift = a->indexshift; 215 Scalar *ap; 216 217 PetscFunctionBegin; 218 for (i=0; i<m; i++) { 219 if (im[i] < 0) continue; 220 #if defined(PETSC_USE_BOPT_g) 221 if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 222 #endif 223 if (im[i] >= rstart && im[i] < rend) { 224 row = im[i] - rstart; 225 for (j=0; j<n; j++) { 226 if (in[j] >= cstart && in[j] < cend){ 227 col = in[j] - cstart; 228 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 229 if (ignorezeroentries && value == 0.0) continue; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } else if (in[j] < 0) continue; 233 #if defined(PETSC_USE_BOPT_g) 234 else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");} 235 #endif 236 else { 237 if (mat->was_assembled) { 238 if (!aij->colmap) { 239 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 240 } 241 #if defined (PETSC_USE_CTABLE) 242 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 243 col--; 244 #else 245 col = aij->colmap[in[j]] - 1; 246 #endif 247 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 248 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 249 col = in[j]; 250 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 251 B = aij->B; 252 b = (Mat_SeqAIJ*)B->data; 253 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 254 ba = b->a; 255 } 256 } else col = in[j]; 257 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 258 if (ignorezeroentries && value == 0.0) continue; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 if (ignorezeroentries && v[i*n] == 0.0) continue; 267 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 268 } else { 269 if (ignorezeroentries && v[i] == 0.0) continue; 270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 271 } 272 } 273 } 274 } 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatGetValues_MPIAIJ" 280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 281 { 282 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 283 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 284 int cstart = aij->cstart,cend = aij->cend,row,col; 285 286 PetscFunctionBegin; 287 for (i=0; i<m; i++) { 288 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 289 if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 290 if (idxm[i] >= rstart && idxm[i] < rend) { 291 row = idxm[i] - rstart; 292 for (j=0; j<n; j++) { 293 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 294 if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 295 if (idxn[j] >= cstart && idxn[j] < cend){ 296 col = idxn[j] - cstart; 297 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 298 } else { 299 if (!aij->colmap) { 300 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 301 } 302 #if defined (PETSC_USE_CTABLE) 303 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 304 col --; 305 #else 306 col = aij->colmap[idxn[j]] - 1; 307 #endif 308 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 309 else { 310 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 311 } 312 } 313 } 314 } else { 315 SETERRQ(PETSC_ERR_SUP,"Only local values currently supported"); 316 } 317 } 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 324 { 325 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 326 int ierr,nstash,reallocs; 327 InsertMode addv; 328 329 PetscFunctionBegin; 330 if (aij->donotstash) { 331 PetscFunctionReturn(0); 332 } 333 334 /* make sure all processors are either in INSERTMODE or ADDMODE */ 335 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 336 if (addv == (ADD_VALUES|INSERT_VALUES)) { 337 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 338 } 339 mat->insertmode = addv; /* in case this processor had no cache */ 340 341 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 342 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 343 PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 344 PetscFunctionReturn(0); 345 } 346 347 348 #undef __FUNCT__ 349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 351 { 352 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 353 int i,j,rstart,ncols,n,ierr,flg; 354 int *row,*col,other_disassembled; 355 Scalar *val; 356 InsertMode addv = mat->insertmode; 357 358 PetscFunctionBegin; 359 if (!aij->donotstash) { 360 while (1) { 361 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 362 if (!flg) break; 363 364 for (i=0; i<n;) { 365 /* Now identify the consecutive vals belonging to the same row */ 366 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 367 if (j < n) ncols = j-i; 368 else ncols = n-i; 369 /* Now assemble all these values with a single function call */ 370 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 371 i = j; 372 } 373 } 374 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 375 } 376 377 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 378 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 379 380 /* determine if any processor has disassembled, if so we must 381 also disassemble ourselfs, in order that we may reassemble. */ 382 /* 383 if nonzero structure of submatrix B cannot change then we know that 384 no processor disassembled thus we can skip this stuff 385 */ 386 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 387 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 388 if (mat->was_assembled && !other_disassembled) { 389 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 390 } 391 } 392 393 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 394 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 395 } 396 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 397 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 398 399 if (aij->rowvalues) { 400 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 401 aij->rowvalues = 0; 402 } 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 408 int MatZeroEntries_MPIAIJ(Mat A) 409 { 410 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 411 int ierr; 412 413 PetscFunctionBegin; 414 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 415 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatZeroRows_MPIAIJ" 421 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 422 { 423 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 424 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 425 int *procs,*nprocs,j,idx,nsends,*work,row; 426 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 427 int *rvalues,tag = A->tag,count,base,slen,n,*source; 428 int *lens,imdex,*lrows,*values,rstart=l->rstart; 429 MPI_Comm comm = A->comm; 430 MPI_Request *send_waits,*recv_waits; 431 MPI_Status recv_status,*send_status; 432 IS istmp; 433 PetscTruth found; 434 435 PetscFunctionBegin; 436 ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr); 437 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 438 439 /* first count number of contributors to each processor */ 440 ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr); 441 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 442 procs = nprocs + size; 443 ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/ 444 for (i=0; i<N; i++) { 445 idx = rows[i]; 446 found = PETSC_FALSE; 447 for (j=0; j<size; j++) { 448 if (idx >= owners[j] && idx < owners[j+1]) { 449 nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break; 450 } 451 } 452 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 453 } 454 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 455 456 /* inform other processors of number of messages and max length*/ 457 ierr = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr); 458 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 459 nrecvs = work[size+rank]; 460 nmax = work[rank]; 461 ierr = PetscFree(work);CHKERRQ(ierr); 462 463 /* post receives: */ 464 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr); 465 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 466 for (i=0; i<nrecvs; i++) { 467 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 468 } 469 470 /* do sends: 471 1) starts[i] gives the starting index in svalues for stuff going to 472 the ith processor 473 */ 474 ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr); 475 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 476 ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr); 477 starts[0] = 0; 478 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 479 for (i=0; i<N; i++) { 480 svalues[starts[owner[i]]++] = rows[i]; 481 } 482 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 483 484 starts[0] = 0; 485 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 486 count = 0; 487 for (i=0; i<size; i++) { 488 if (procs[i]) { 489 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 490 } 491 } 492 ierr = PetscFree(starts);CHKERRQ(ierr); 493 494 base = owners[rank]; 495 496 /* wait on receives */ 497 ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr); 498 source = lens + nrecvs; 499 count = nrecvs; slen = 0; 500 while (count) { 501 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 502 /* unpack receives into our local space */ 503 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 504 source[imdex] = recv_status.MPI_SOURCE; 505 lens[imdex] = n; 506 slen += n; 507 count--; 508 } 509 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 510 511 /* move the data into the send scatter */ 512 ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr); 513 count = 0; 514 for (i=0; i<nrecvs; i++) { 515 values = rvalues + i*nmax; 516 for (j=0; j<lens[i]; j++) { 517 lrows[count++] = values[j] - base; 518 } 519 } 520 ierr = PetscFree(rvalues);CHKERRQ(ierr); 521 ierr = PetscFree(lens);CHKERRQ(ierr); 522 ierr = PetscFree(owner);CHKERRQ(ierr); 523 ierr = PetscFree(nprocs);CHKERRQ(ierr); 524 525 /* actually zap the local rows */ 526 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 527 PetscLogObjectParent(A,istmp); 528 529 /* 530 Zero the required rows. If the "diagonal block" of the matrix 531 is square and the user wishes to set the diagonal we use seperate 532 code so that MatSetValues() is not called for each diagonal allocating 533 new memory, thus calling lots of mallocs and slowing things down. 534 535 Contributed by: Mathew Knepley 536 */ 537 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 538 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 539 if (diag && (l->A->M == l->A->N)) { 540 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 541 } else if (diag) { 542 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 543 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 544 SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 546 } 547 for (i = 0; i < slen; i++) { 548 row = lrows[i] + rstart; 549 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 550 } 551 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 552 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 553 } else { 554 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 555 } 556 ierr = ISDestroy(istmp);CHKERRQ(ierr); 557 ierr = PetscFree(lrows);CHKERRQ(ierr); 558 559 /* wait on sends */ 560 if (nsends) { 561 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 562 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 563 ierr = PetscFree(send_status);CHKERRQ(ierr); 564 } 565 ierr = PetscFree(send_waits);CHKERRQ(ierr); 566 ierr = PetscFree(svalues);CHKERRQ(ierr); 567 568 PetscFunctionReturn(0); 569 } 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "MatMult_MPIAIJ" 573 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 574 { 575 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 576 int ierr,nt; 577 578 PetscFunctionBegin; 579 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 580 if (nt != A->n) { 581 SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt); 582 } 583 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 584 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 585 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 586 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatMultAdd_MPIAIJ" 592 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 593 { 594 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 595 int ierr; 596 597 PetscFunctionBegin; 598 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 599 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 600 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 601 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 607 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 608 { 609 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 610 int ierr; 611 612 PetscFunctionBegin; 613 /* do nondiagonal part */ 614 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 615 /* send it on its way */ 616 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 617 /* do local part */ 618 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 619 /* receive remote parts: note this assumes the values are not actually */ 620 /* inserted in yy until the next line, which is true for my implementation*/ 621 /* but is not perhaps always true. */ 622 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 628 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 629 { 630 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 631 int ierr; 632 633 PetscFunctionBegin; 634 /* do nondiagonal part */ 635 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 636 /* send it on its way */ 637 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 638 /* do local part */ 639 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 640 /* receive remote parts: note this assumes the values are not actually */ 641 /* inserted in yy until the next line, which is true for my implementation*/ 642 /* but is not perhaps always true. */ 643 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 644 PetscFunctionReturn(0); 645 } 646 647 /* 648 This only works correctly for square matrices where the subblock A->A is the 649 diagonal block 650 */ 651 #undef __FUNCT__ 652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 653 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 654 { 655 int ierr; 656 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 657 658 PetscFunctionBegin; 659 if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 660 if (a->rstart != a->cstart || a->rend != a->cend) { 661 SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 662 } 663 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 664 PetscFunctionReturn(0); 665 } 666 667 #undef __FUNCT__ 668 #define __FUNCT__ "MatScale_MPIAIJ" 669 int MatScale_MPIAIJ(Scalar *aa,Mat A) 670 { 671 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 672 int ierr; 673 674 PetscFunctionBegin; 675 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 676 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 677 PetscFunctionReturn(0); 678 } 679 680 #undef __FUNCT__ 681 #define __FUNCT__ "MatDestroy_MPIAIJ" 682 int MatDestroy_MPIAIJ(Mat mat) 683 { 684 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 685 int ierr; 686 687 PetscFunctionBegin; 688 #if defined(PETSC_USE_LOG) 689 PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N); 690 #endif 691 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 692 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 693 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 694 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 695 #if defined (PETSC_USE_CTABLE) 696 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 697 #else 698 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 699 #endif 700 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 701 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 702 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 703 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 704 ierr = PetscFree(aij);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 710 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 711 { 712 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 713 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 714 int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size; 715 PetscTruth isdraw,isascii,flg; 716 PetscViewer sviewer; 717 PetscViewerFormat format; 718 719 PetscFunctionBegin; 720 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 721 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 722 if (isascii) { 723 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 724 if (format == PETSC_VIEWER_ASCII_INFO_LONG) { 725 MatInfo info; 726 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 727 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 728 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 729 if (flg) { 730 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 731 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 732 } else { 733 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 734 rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 735 } 736 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 737 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 738 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 739 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 740 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 741 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 742 PetscFunctionReturn(0); 743 } else if (format == PETSC_VIEWER_ASCII_INFO) { 744 PetscFunctionReturn(0); 745 } 746 } else if (isdraw) { 747 PetscDraw draw; 748 PetscTruth isnull; 749 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 750 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 751 } 752 753 if (size == 1) { 754 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 755 } else { 756 /* assemble the entire matrix onto first processor. */ 757 Mat A; 758 Mat_SeqAIJ *Aloc; 759 int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct; 760 Scalar *a; 761 762 if (!rank) { 763 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 764 } else { 765 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 766 } 767 PetscLogObjectParent(mat,A); 768 769 /* copy over the A part */ 770 Aloc = (Mat_SeqAIJ*)aij->A->data; 771 m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 772 row = aij->rstart; 773 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 774 for (i=0; i<m; i++) { 775 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 776 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 777 } 778 aj = Aloc->j; 779 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 780 781 /* copy over the B part */ 782 Aloc = (Mat_SeqAIJ*)aij->B->data; 783 m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 784 row = aij->rstart; 785 ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr); 786 ct = cols; 787 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 788 for (i=0; i<m; i++) { 789 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 790 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 791 } 792 ierr = PetscFree(ct);CHKERRQ(ierr); 793 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 794 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 795 /* 796 Everyone has to call to draw the matrix since the graphics waits are 797 synchronized across all processors that share the PetscDraw object 798 */ 799 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 800 if (!rank) { 801 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 802 } 803 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 804 ierr = MatDestroy(A);CHKERRQ(ierr); 805 } 806 PetscFunctionReturn(0); 807 } 808 809 #undef __FUNCT__ 810 #define __FUNCT__ "MatView_MPIAIJ" 811 int MatView_MPIAIJ(Mat mat,PetscViewer viewer) 812 { 813 int ierr; 814 PetscTruth isascii,isdraw,issocket,isbinary; 815 816 PetscFunctionBegin; 817 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 818 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 819 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 820 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 821 if (isascii || isdraw || isbinary || issocket) { 822 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 823 } else { 824 SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 825 } 826 PetscFunctionReturn(0); 827 } 828 829 /* 830 This has to provide several versions. 831 832 2) a) use only local smoothing updating outer values only once. 833 b) local smoothing updating outer values each inner iteration 834 3) color updating out values betwen colors. 835 */ 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatRelax_MPIAIJ" 838 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,Vec xx) 839 { 840 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 841 Mat AA = mat->A,BB = mat->B; 842 Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data; 843 Scalar *b,*x,*xs,*ls,d,*v,sum; 844 int ierr,*idx,*diag; 845 int n = matin->n,m = matin->m,i,shift = A->indexshift; 846 847 PetscFunctionBegin; 848 if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);} 849 diag = A->diag; 850 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 851 if (flag & SOR_ZERO_INITIAL_GUESS) { 852 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 853 PetscFunctionReturn(0); 854 } 855 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 856 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 857 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 858 if (xx != bb) { 859 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 860 } else { 861 b = x; 862 } 863 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 864 xs = x + shift; /* shift by one for index start of 1 */ 865 ls = ls + shift; 866 while (its--) { 867 /* go down through the rows */ 868 for (i=0; i<m; i++) { 869 n = A->i[i+1] - A->i[i]; 870 PetscLogFlops(4*n+3); 871 idx = A->j + A->i[i] + shift; 872 v = A->a + A->i[i] + shift; 873 sum = b[i]; 874 SPARSEDENSEMDOT(sum,xs,v,idx,n); 875 d = fshift + A->a[diag[i]+shift]; 876 n = B->i[i+1] - B->i[i]; 877 idx = B->j + B->i[i] + shift; 878 v = B->a + B->i[i] + shift; 879 SPARSEDENSEMDOT(sum,ls,v,idx,n); 880 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 881 } 882 /* come up through the rows */ 883 for (i=m-1; i>-1; i--) { 884 n = A->i[i+1] - A->i[i]; 885 PetscLogFlops(4*n+3); 886 idx = A->j + A->i[i] + shift; 887 v = A->a + A->i[i] + shift; 888 sum = b[i]; 889 SPARSEDENSEMDOT(sum,xs,v,idx,n); 890 d = fshift + A->a[diag[i]+shift]; 891 n = B->i[i+1] - B->i[i]; 892 idx = B->j + B->i[i] + shift; 893 v = B->a + B->i[i] + shift; 894 SPARSEDENSEMDOT(sum,ls,v,idx,n); 895 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 896 } 897 } 898 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 899 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 900 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 901 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 902 if (flag & SOR_ZERO_INITIAL_GUESS) { 903 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 904 PetscFunctionReturn(0); 905 } 906 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 907 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 908 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 909 if (xx != bb) { 910 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 911 } else { 912 b = x; 913 } 914 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 915 xs = x + shift; /* shift by one for index start of 1 */ 916 ls = ls + shift; 917 while (its--) { 918 for (i=0; i<m; i++) { 919 n = A->i[i+1] - A->i[i]; 920 PetscLogFlops(4*n+3); 921 idx = A->j + A->i[i] + shift; 922 v = A->a + A->i[i] + shift; 923 sum = b[i]; 924 SPARSEDENSEMDOT(sum,xs,v,idx,n); 925 d = fshift + A->a[diag[i]+shift]; 926 n = B->i[i+1] - B->i[i]; 927 idx = B->j + B->i[i] + shift; 928 v = B->a + B->i[i] + shift; 929 SPARSEDENSEMDOT(sum,ls,v,idx,n); 930 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 931 } 932 } 933 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 934 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 935 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 936 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 937 if (flag & SOR_ZERO_INITIAL_GUESS) { 938 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 939 PetscFunctionReturn(0); 940 } 941 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 942 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 943 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 944 if (xx != bb) { 945 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 946 } else { 947 b = x; 948 } 949 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 950 xs = x + shift; /* shift by one for index start of 1 */ 951 ls = ls + shift; 952 while (its--) { 953 for (i=m-1; i>-1; i--) { 954 n = A->i[i+1] - A->i[i]; 955 PetscLogFlops(4*n+3); 956 idx = A->j + A->i[i] + shift; 957 v = A->a + A->i[i] + shift; 958 sum = b[i]; 959 SPARSEDENSEMDOT(sum,xs,v,idx,n); 960 d = fshift + A->a[diag[i]+shift]; 961 n = B->i[i+1] - B->i[i]; 962 idx = B->j + B->i[i] + shift; 963 v = B->a + B->i[i] + shift; 964 SPARSEDENSEMDOT(sum,ls,v,idx,n); 965 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 966 } 967 } 968 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 969 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 970 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 971 } else { 972 SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported"); 973 } 974 PetscFunctionReturn(0); 975 } 976 977 #undef __FUNCT__ 978 #define __FUNCT__ "MatGetInfo_MPIAIJ" 979 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 980 { 981 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 982 Mat A = mat->A,B = mat->B; 983 int ierr; 984 PetscReal isend[5],irecv[5]; 985 986 PetscFunctionBegin; 987 info->block_size = 1.0; 988 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 989 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 990 isend[3] = info->memory; isend[4] = info->mallocs; 991 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 992 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 993 isend[3] += info->memory; isend[4] += info->mallocs; 994 if (flag == MAT_LOCAL) { 995 info->nz_used = isend[0]; 996 info->nz_allocated = isend[1]; 997 info->nz_unneeded = isend[2]; 998 info->memory = isend[3]; 999 info->mallocs = isend[4]; 1000 } else if (flag == MAT_GLOBAL_MAX) { 1001 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1002 info->nz_used = irecv[0]; 1003 info->nz_allocated = irecv[1]; 1004 info->nz_unneeded = irecv[2]; 1005 info->memory = irecv[3]; 1006 info->mallocs = irecv[4]; 1007 } else if (flag == MAT_GLOBAL_SUM) { 1008 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1009 info->nz_used = irecv[0]; 1010 info->nz_allocated = irecv[1]; 1011 info->nz_unneeded = irecv[2]; 1012 info->memory = irecv[3]; 1013 info->mallocs = irecv[4]; 1014 } 1015 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1016 info->fill_ratio_needed = 0; 1017 info->factor_mallocs = 0; 1018 info->rows_global = (double)matin->M; 1019 info->columns_global = (double)matin->N; 1020 info->rows_local = (double)matin->m; 1021 info->columns_local = (double)matin->N; 1022 1023 PetscFunctionReturn(0); 1024 } 1025 1026 #undef __FUNCT__ 1027 #define __FUNCT__ "MatSetOption_MPIAIJ" 1028 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1029 { 1030 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1031 int ierr; 1032 1033 PetscFunctionBegin; 1034 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1035 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1036 op == MAT_COLUMNS_UNSORTED || 1037 op == MAT_COLUMNS_SORTED || 1038 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1039 op == MAT_KEEP_ZEROED_ROWS || 1040 op == MAT_NEW_NONZERO_LOCATION_ERR || 1041 op == MAT_USE_INODES || 1042 op == MAT_DO_NOT_USE_INODES || 1043 op == MAT_IGNORE_ZERO_ENTRIES) { 1044 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1045 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1046 } else if (op == MAT_ROW_ORIENTED) { 1047 a->roworiented = PETSC_TRUE; 1048 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1049 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1050 } else if (op == MAT_ROWS_SORTED || 1051 op == MAT_ROWS_UNSORTED || 1052 op == MAT_YES_NEW_DIAGONALS) { 1053 PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1054 } else if (op == MAT_COLUMN_ORIENTED) { 1055 a->roworiented = PETSC_FALSE; 1056 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1057 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1058 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1059 a->donotstash = PETSC_TRUE; 1060 } else if (op == MAT_NO_NEW_DIAGONALS){ 1061 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 1062 } else { 1063 SETERRQ(PETSC_ERR_SUP,"unknown option"); 1064 } 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatGetOwnershipRange_MPIAIJ" 1070 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1071 { 1072 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1073 1074 PetscFunctionBegin; 1075 if (m) *m = mat->rstart; 1076 if (n) *n = mat->rend; 1077 PetscFunctionReturn(0); 1078 } 1079 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatGetRow_MPIAIJ" 1082 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1083 { 1084 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1085 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1086 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1087 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1088 int *cmap,*idx_p; 1089 1090 PetscFunctionBegin; 1091 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1092 mat->getrowactive = PETSC_TRUE; 1093 1094 if (!mat->rowvalues && (idx || v)) { 1095 /* 1096 allocate enough space to hold information from the longest row. 1097 */ 1098 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1099 int max = 1,tmp; 1100 for (i=0; i<matin->m; i++) { 1101 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1102 if (max < tmp) { max = tmp; } 1103 } 1104 ierr = PetscMalloc(max*(sizeof(int)+sizeof(Scalar)),&mat->rowvalues);CHKERRQ(ierr); 1105 mat->rowindices = (int*)(mat->rowvalues + max); 1106 } 1107 1108 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows") 1109 lrow = row - rstart; 1110 1111 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1112 if (!v) {pvA = 0; pvB = 0;} 1113 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1114 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1115 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1116 nztot = nzA + nzB; 1117 1118 cmap = mat->garray; 1119 if (v || idx) { 1120 if (nztot) { 1121 /* Sort by increasing column numbers, assuming A and B already sorted */ 1122 int imark = -1; 1123 if (v) { 1124 *v = v_p = mat->rowvalues; 1125 for (i=0; i<nzB; i++) { 1126 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1127 else break; 1128 } 1129 imark = i; 1130 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1131 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1132 } 1133 if (idx) { 1134 *idx = idx_p = mat->rowindices; 1135 if (imark > -1) { 1136 for (i=0; i<imark; i++) { 1137 idx_p[i] = cmap[cworkB[i]]; 1138 } 1139 } else { 1140 for (i=0; i<nzB; i++) { 1141 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1142 else break; 1143 } 1144 imark = i; 1145 } 1146 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1147 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1148 } 1149 } else { 1150 if (idx) *idx = 0; 1151 if (v) *v = 0; 1152 } 1153 } 1154 *nz = nztot; 1155 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1156 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1162 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1163 { 1164 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1165 1166 PetscFunctionBegin; 1167 if (aij->getrowactive == PETSC_FALSE) { 1168 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1169 } 1170 aij->getrowactive = PETSC_FALSE; 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatNorm_MPIAIJ" 1176 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1177 { 1178 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1179 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1180 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1181 PetscReal sum = 0.0; 1182 Scalar *v; 1183 1184 PetscFunctionBegin; 1185 if (aij->size == 1) { 1186 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1187 } else { 1188 if (type == NORM_FROBENIUS) { 1189 v = amat->a; 1190 for (i=0; i<amat->nz; i++) { 1191 #if defined(PETSC_USE_COMPLEX) 1192 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1193 #else 1194 sum += (*v)*(*v); v++; 1195 #endif 1196 } 1197 v = bmat->a; 1198 for (i=0; i<bmat->nz; i++) { 1199 #if defined(PETSC_USE_COMPLEX) 1200 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1201 #else 1202 sum += (*v)*(*v); v++; 1203 #endif 1204 } 1205 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1206 *norm = sqrt(*norm); 1207 } else if (type == NORM_1) { /* max column norm */ 1208 PetscReal *tmp,*tmp2; 1209 int *jj,*garray = aij->garray; 1210 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1211 ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1212 ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr); 1213 *norm = 0.0; 1214 v = amat->a; jj = amat->j; 1215 for (j=0; j<amat->nz; j++) { 1216 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1217 } 1218 v = bmat->a; jj = bmat->j; 1219 for (j=0; j<bmat->nz; j++) { 1220 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1221 } 1222 ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1223 for (j=0; j<mat->N; j++) { 1224 if (tmp2[j] > *norm) *norm = tmp2[j]; 1225 } 1226 ierr = PetscFree(tmp);CHKERRQ(ierr); 1227 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1228 } else if (type == NORM_INFINITY) { /* max row norm */ 1229 PetscReal ntemp = 0.0; 1230 for (j=0; j<aij->A->m; j++) { 1231 v = amat->a + amat->i[j] + shift; 1232 sum = 0.0; 1233 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1234 sum += PetscAbsScalar(*v); v++; 1235 } 1236 v = bmat->a + bmat->i[j] + shift; 1237 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1238 sum += PetscAbsScalar(*v); v++; 1239 } 1240 if (sum > ntemp) ntemp = sum; 1241 } 1242 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1243 } else { 1244 SETERRQ(PETSC_ERR_SUP,"No support for two norm"); 1245 } 1246 } 1247 PetscFunctionReturn(0); 1248 } 1249 1250 #undef __FUNCT__ 1251 #define __FUNCT__ "MatTranspose_MPIAIJ" 1252 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1253 { 1254 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1255 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1256 int ierr,shift = Aloc->indexshift; 1257 int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct; 1258 Mat B; 1259 Scalar *array; 1260 1261 PetscFunctionBegin; 1262 if (!matout && M != N) { 1263 SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1264 } 1265 1266 ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1267 1268 /* copy over the A part */ 1269 Aloc = (Mat_SeqAIJ*)a->A->data; 1270 m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1271 row = a->rstart; 1272 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1273 for (i=0; i<m; i++) { 1274 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1275 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1276 } 1277 aj = Aloc->j; 1278 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1279 1280 /* copy over the B part */ 1281 Aloc = (Mat_SeqAIJ*)a->B->data; 1282 m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1283 row = a->rstart; 1284 ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr); 1285 ct = cols; 1286 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1287 for (i=0; i<m; i++) { 1288 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1289 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1290 } 1291 ierr = PetscFree(ct);CHKERRQ(ierr); 1292 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1293 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1294 if (matout) { 1295 *matout = B; 1296 } else { 1297 ierr = MatHeaderCopy(A,B);CHKERRQ(ierr); 1298 } 1299 PetscFunctionReturn(0); 1300 } 1301 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 1304 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1305 { 1306 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1307 Mat a = aij->A,b = aij->B; 1308 int ierr,s1,s2,s3; 1309 1310 PetscFunctionBegin; 1311 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1312 if (rr) { 1313 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1314 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 1315 /* Overlap communication with computation. */ 1316 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1317 } 1318 if (ll) { 1319 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1320 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 1321 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1322 } 1323 /* scale the diagonal block */ 1324 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1325 1326 if (rr) { 1327 /* Do a scatter end and then right scale the off-diagonal block */ 1328 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1329 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1330 } 1331 1332 PetscFunctionReturn(0); 1333 } 1334 1335 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "MatPrintHelp_MPIAIJ" 1338 int MatPrintHelp_MPIAIJ(Mat A) 1339 { 1340 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1341 int ierr; 1342 1343 PetscFunctionBegin; 1344 if (!a->rank) { 1345 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1346 } 1347 PetscFunctionReturn(0); 1348 } 1349 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatGetBlockSize_MPIAIJ" 1352 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1353 { 1354 PetscFunctionBegin; 1355 *bs = 1; 1356 PetscFunctionReturn(0); 1357 } 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 1360 int MatSetUnfactored_MPIAIJ(Mat A) 1361 { 1362 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1363 int ierr; 1364 1365 PetscFunctionBegin; 1366 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatEqual_MPIAIJ" 1372 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1373 { 1374 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1375 Mat a,b,c,d; 1376 PetscTruth flg; 1377 int ierr; 1378 1379 PetscFunctionBegin; 1380 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1381 if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type"); 1382 a = matA->A; b = matA->B; 1383 c = matB->A; d = matB->B; 1384 1385 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1386 if (flg == PETSC_TRUE) { 1387 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1388 } 1389 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1390 PetscFunctionReturn(0); 1391 } 1392 1393 #undef __FUNCT__ 1394 #define __FUNCT__ "MatCopy_MPIAIJ" 1395 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1396 { 1397 int ierr; 1398 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1399 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1400 PetscTruth flg; 1401 1402 PetscFunctionBegin; 1403 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr); 1404 if (str != SAME_NONZERO_PATTERN || !flg) { 1405 /* because of the column compression in the off-processor part of the matrix a->B, 1406 the number of columns in a->B and b->B may be different, hence we cannot call 1407 the MatCopy() directly on the two parts. If need be, we can provide a more 1408 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1409 then copying the submatrices */ 1410 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1411 } else { 1412 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1413 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1414 } 1415 PetscFunctionReturn(0); 1416 } 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ" 1420 int MatSetUpPreallocation_MPIAIJ(Mat A) 1421 { 1422 int ierr; 1423 1424 PetscFunctionBegin; 1425 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1430 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1431 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1432 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1433 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1434 #if !defined(PETSC_USE_COMPLEX) 1435 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*); 1436 #endif 1437 1438 /* -------------------------------------------------------------------*/ 1439 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1440 MatGetRow_MPIAIJ, 1441 MatRestoreRow_MPIAIJ, 1442 MatMult_MPIAIJ, 1443 MatMultAdd_MPIAIJ, 1444 MatMultTranspose_MPIAIJ, 1445 MatMultTransposeAdd_MPIAIJ, 1446 0, 1447 0, 1448 0, 1449 0, 1450 0, 1451 0, 1452 MatRelax_MPIAIJ, 1453 MatTranspose_MPIAIJ, 1454 MatGetInfo_MPIAIJ, 1455 MatEqual_MPIAIJ, 1456 MatGetDiagonal_MPIAIJ, 1457 MatDiagonalScale_MPIAIJ, 1458 MatNorm_MPIAIJ, 1459 MatAssemblyBegin_MPIAIJ, 1460 MatAssemblyEnd_MPIAIJ, 1461 0, 1462 MatSetOption_MPIAIJ, 1463 MatZeroEntries_MPIAIJ, 1464 MatZeroRows_MPIAIJ, 1465 #if !defined(PETSC_USE_COMPLEX) 1466 MatLUFactorSymbolic_MPIAIJ_TFS, 1467 #else 1468 0, 1469 #endif 1470 0, 1471 0, 1472 0, 1473 MatSetUpPreallocation_MPIAIJ, 1474 0, 1475 MatGetOwnershipRange_MPIAIJ, 1476 0, 1477 0, 1478 0, 1479 0, 1480 MatDuplicate_MPIAIJ, 1481 0, 1482 0, 1483 0, 1484 0, 1485 0, 1486 MatGetSubMatrices_MPIAIJ, 1487 MatIncreaseOverlap_MPIAIJ, 1488 MatGetValues_MPIAIJ, 1489 MatCopy_MPIAIJ, 1490 MatPrintHelp_MPIAIJ, 1491 MatScale_MPIAIJ, 1492 0, 1493 0, 1494 0, 1495 MatGetBlockSize_MPIAIJ, 1496 0, 1497 0, 1498 0, 1499 0, 1500 MatFDColoringCreate_MPIAIJ, 1501 0, 1502 MatSetUnfactored_MPIAIJ, 1503 0, 1504 0, 1505 MatGetSubMatrix_MPIAIJ, 1506 MatDestroy_MPIAIJ, 1507 MatView_MPIAIJ, 1508 MatGetMaps_Petsc, 1509 0, 1510 0, 1511 0, 1512 0, 1513 0, 1514 0, 1515 0, 1516 0, 1517 MatSetColoring_MPIAIJ, 1518 MatSetValuesAdic_MPIAIJ, 1519 MatSetValuesAdifor_MPIAIJ 1520 }; 1521 1522 /* ----------------------------------------------------------------------------------------*/ 1523 1524 EXTERN_C_BEGIN 1525 #undef __FUNCT__ 1526 #define __FUNCT__ "MatStoreValues_MPIAIJ" 1527 int MatStoreValues_MPIAIJ(Mat mat) 1528 { 1529 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1530 int ierr; 1531 1532 PetscFunctionBegin; 1533 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1534 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1535 PetscFunctionReturn(0); 1536 } 1537 EXTERN_C_END 1538 1539 EXTERN_C_BEGIN 1540 #undef __FUNCT__ 1541 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 1542 int MatRetrieveValues_MPIAIJ(Mat mat) 1543 { 1544 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1545 int ierr; 1546 1547 PetscFunctionBegin; 1548 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1549 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1550 PetscFunctionReturn(0); 1551 } 1552 EXTERN_C_END 1553 1554 #include "petscpc.h" 1555 EXTERN_C_BEGIN 1556 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1557 EXTERN_C_END 1558 1559 EXTERN_C_BEGIN 1560 #undef __FUNCT__ 1561 #define __FUNCT__ "MatCreate_MPIAIJ" 1562 int MatCreate_MPIAIJ(Mat B) 1563 { 1564 Mat_MPIAIJ *b; 1565 int ierr,i,size; 1566 1567 PetscFunctionBegin; 1568 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1569 1570 ierr = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr); 1571 B->data = (void*)b; 1572 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1573 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1574 B->factor = 0; 1575 B->assembled = PETSC_FALSE; 1576 B->mapping = 0; 1577 1578 B->insertmode = NOT_SET_VALUES; 1579 b->size = size; 1580 ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr); 1581 1582 ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr); 1583 ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr); 1584 1585 /* the information in the maps duplicates the information computed below, eventually 1586 we should remove the duplicate information that is not contained in the maps */ 1587 ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1588 ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1589 1590 /* build local table of row and column ownerships */ 1591 ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr); 1592 PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1593 b->cowners = b->rowners + b->size + 2; 1594 ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1595 b->rowners[0] = 0; 1596 for (i=2; i<=b->size; i++) { 1597 b->rowners[i] += b->rowners[i-1]; 1598 } 1599 b->rstart = b->rowners[b->rank]; 1600 b->rend = b->rowners[b->rank+1]; 1601 ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr); 1602 b->cowners[0] = 0; 1603 for (i=2; i<=b->size; i++) { 1604 b->cowners[i] += b->cowners[i-1]; 1605 } 1606 b->cstart = b->cowners[b->rank]; 1607 b->cend = b->cowners[b->rank+1]; 1608 1609 /* build cache for off array entries formed */ 1610 ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 1611 b->donotstash = PETSC_FALSE; 1612 b->colmap = 0; 1613 b->garray = 0; 1614 b->roworiented = PETSC_TRUE; 1615 1616 /* stuff used for matrix vector multiply */ 1617 b->lvec = PETSC_NULL; 1618 b->Mvctx = PETSC_NULL; 1619 1620 /* stuff for MatGetRow() */ 1621 b->rowindices = 0; 1622 b->rowvalues = 0; 1623 b->getrowactive = PETSC_FALSE; 1624 1625 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1626 "MatStoreValues_MPIAIJ", 1627 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1628 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1629 "MatRetrieveValues_MPIAIJ", 1630 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1631 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1632 "MatGetDiagonalBlock_MPIAIJ", 1633 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1634 PetscFunctionReturn(0); 1635 } 1636 EXTERN_C_END 1637 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "MatDuplicate_MPIAIJ" 1640 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1641 { 1642 Mat mat; 1643 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1644 int ierr; 1645 1646 PetscFunctionBegin; 1647 *newmat = 0; 1648 ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr); 1649 ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr); 1650 a = (Mat_MPIAIJ*)mat->data; 1651 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1652 mat->factor = matin->factor; 1653 mat->assembled = PETSC_TRUE; 1654 mat->insertmode = NOT_SET_VALUES; 1655 mat->preallocated = PETSC_TRUE; 1656 1657 a->rstart = oldmat->rstart; 1658 a->rend = oldmat->rend; 1659 a->cstart = oldmat->cstart; 1660 a->cend = oldmat->cend; 1661 a->size = oldmat->size; 1662 a->rank = oldmat->rank; 1663 a->donotstash = oldmat->donotstash; 1664 a->roworiented = oldmat->roworiented; 1665 a->rowindices = 0; 1666 a->rowvalues = 0; 1667 a->getrowactive = PETSC_FALSE; 1668 1669 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1670 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1671 if (oldmat->colmap) { 1672 #if defined (PETSC_USE_CTABLE) 1673 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1674 #else 1675 ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr); 1676 PetscLogObjectMemory(mat,(mat->N)*sizeof(int)); 1677 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr); 1678 #endif 1679 } else a->colmap = 0; 1680 if (oldmat->garray) { 1681 int len; 1682 len = oldmat->B->n; 1683 ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr); 1684 PetscLogObjectMemory(mat,len*sizeof(int)); 1685 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1686 } else a->garray = 0; 1687 1688 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1689 PetscLogObjectParent(mat,a->lvec); 1690 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1691 PetscLogObjectParent(mat,a->Mvctx); 1692 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1693 PetscLogObjectParent(mat,a->A); 1694 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1695 PetscLogObjectParent(mat,a->B); 1696 ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1697 *newmat = mat; 1698 PetscFunctionReturn(0); 1699 } 1700 1701 #include "petscsys.h" 1702 1703 EXTERN_C_BEGIN 1704 #undef __FUNCT__ 1705 #define __FUNCT__ "MatLoad_MPIAIJ" 1706 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat) 1707 { 1708 Mat A; 1709 Scalar *vals,*svals; 1710 MPI_Comm comm = ((PetscObject)viewer)->comm; 1711 MPI_Status status; 1712 int i,nz,ierr,j,rstart,rend,fd; 1713 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1714 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1715 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1716 1717 PetscFunctionBegin; 1718 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1719 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1720 if (!rank) { 1721 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1722 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1723 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 1724 if (header[3] < 0) { 1725 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ"); 1726 } 1727 } 1728 1729 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1730 M = header[1]; N = header[2]; 1731 /* determine ownership of all rows */ 1732 m = M/size + ((M % size) > rank); 1733 ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr); 1734 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1735 rowners[0] = 0; 1736 for (i=2; i<=size; i++) { 1737 rowners[i] += rowners[i-1]; 1738 } 1739 rstart = rowners[rank]; 1740 rend = rowners[rank+1]; 1741 1742 /* distribute row lengths to all processors */ 1743 ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr); 1744 offlens = ourlens + (rend-rstart); 1745 if (!rank) { 1746 ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr); 1747 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1748 ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr); 1749 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 1750 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1751 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1752 } else { 1753 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1754 } 1755 1756 if (!rank) { 1757 /* calculate the number of nonzeros on each processor */ 1758 ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr); 1759 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1760 for (i=0; i<size; i++) { 1761 for (j=rowners[i]; j< rowners[i+1]; j++) { 1762 procsnz[i] += rowlengths[j]; 1763 } 1764 } 1765 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1766 1767 /* determine max buffer needed and allocate it */ 1768 maxnz = 0; 1769 for (i=0; i<size; i++) { 1770 maxnz = PetscMax(maxnz,procsnz[i]); 1771 } 1772 ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr); 1773 1774 /* read in my part of the matrix column indices */ 1775 nz = procsnz[0]; 1776 ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr); 1777 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1778 1779 /* read in every one elses and ship off */ 1780 for (i=1; i<size; i++) { 1781 nz = procsnz[i]; 1782 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1783 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1784 } 1785 ierr = PetscFree(cols);CHKERRQ(ierr); 1786 } else { 1787 /* determine buffer space needed for message */ 1788 nz = 0; 1789 for (i=0; i<m; i++) { 1790 nz += ourlens[i]; 1791 } 1792 ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr); 1793 1794 /* receive message of column indices*/ 1795 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1796 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1797 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1798 } 1799 1800 /* determine column ownership if matrix is not square */ 1801 if (N != M) { 1802 n = N/size + ((N % size) > rank); 1803 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1804 cstart = cend - n; 1805 } else { 1806 cstart = rstart; 1807 cend = rend; 1808 n = cend - cstart; 1809 } 1810 1811 /* loop over local rows, determining number of off diagonal entries */ 1812 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 1813 jj = 0; 1814 for (i=0; i<m; i++) { 1815 for (j=0; j<ourlens[i]; j++) { 1816 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 1817 jj++; 1818 } 1819 } 1820 1821 /* create our matrix */ 1822 for (i=0; i<m; i++) { 1823 ourlens[i] -= offlens[i]; 1824 } 1825 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1826 A = *newmat; 1827 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 1828 for (i=0; i<m; i++) { 1829 ourlens[i] += offlens[i]; 1830 } 1831 1832 if (!rank) { 1833 ierr = PetscMalloc(maxnz*sizeof(Scalar),&vals);CHKERRQ(ierr); 1834 1835 /* read in my part of the matrix numerical values */ 1836 nz = procsnz[0]; 1837 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1838 1839 /* insert into matrix */ 1840 jj = rstart; 1841 smycols = mycols; 1842 svals = vals; 1843 for (i=0; i<m; i++) { 1844 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1845 smycols += ourlens[i]; 1846 svals += ourlens[i]; 1847 jj++; 1848 } 1849 1850 /* read in other processors and ship out */ 1851 for (i=1; i<size; i++) { 1852 nz = procsnz[i]; 1853 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1854 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1855 } 1856 ierr = PetscFree(procsnz);CHKERRQ(ierr); 1857 } else { 1858 /* receive numeric values */ 1859 ierr = PetscMalloc((nz+1)*sizeof(Scalar),&vals);CHKERRQ(ierr); 1860 1861 /* receive message of values*/ 1862 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1863 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1864 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 1865 1866 /* insert into matrix */ 1867 jj = rstart; 1868 smycols = mycols; 1869 svals = vals; 1870 for (i=0; i<m; i++) { 1871 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1872 smycols += ourlens[i]; 1873 svals += ourlens[i]; 1874 jj++; 1875 } 1876 } 1877 ierr = PetscFree(ourlens);CHKERRQ(ierr); 1878 ierr = PetscFree(vals);CHKERRQ(ierr); 1879 ierr = PetscFree(mycols);CHKERRQ(ierr); 1880 ierr = PetscFree(rowners);CHKERRQ(ierr); 1881 1882 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1883 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1884 PetscFunctionReturn(0); 1885 } 1886 EXTERN_C_END 1887 1888 #undef __FUNCT__ 1889 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 1890 /* 1891 Not great since it makes two copies of the submatrix, first an SeqAIJ 1892 in local and then by concatenating the local matrices the end result. 1893 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1894 */ 1895 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 1896 { 1897 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1898 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 1899 Mat *local,M,Mreuse; 1900 Scalar *vwork,*aa; 1901 MPI_Comm comm = mat->comm; 1902 Mat_SeqAIJ *aij; 1903 1904 1905 PetscFunctionBegin; 1906 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1907 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1908 1909 if (call == MAT_REUSE_MATRIX) { 1910 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1911 if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse"); 1912 local = &Mreuse; 1913 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1914 } else { 1915 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1916 Mreuse = *local; 1917 ierr = PetscFree(local);CHKERRQ(ierr); 1918 } 1919 1920 /* 1921 m - number of local rows 1922 n - number of columns (same on all processors) 1923 rstart - first row in new global matrix generated 1924 */ 1925 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1926 if (call == MAT_INITIAL_MATRIX) { 1927 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1928 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1929 ii = aij->i; 1930 jj = aij->j; 1931 1932 /* 1933 Determine the number of non-zeros in the diagonal and off-diagonal 1934 portions of the matrix in order to do correct preallocation 1935 */ 1936 1937 /* first get start and end of "diagonal" columns */ 1938 if (csize == PETSC_DECIDE) { 1939 nlocal = n/size + ((n % size) > rank); 1940 } else { 1941 nlocal = csize; 1942 } 1943 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1944 rstart = rend - nlocal; 1945 if (rank == size - 1 && rend != n) { 1946 SETERRQ(1,"Local column sizes do not add up to total number of columns"); 1947 } 1948 1949 /* next, compute all the lengths */ 1950 ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr); 1951 olens = dlens + m; 1952 for (i=0; i<m; i++) { 1953 jend = ii[i+1] - ii[i]; 1954 olen = 0; 1955 dlen = 0; 1956 for (j=0; j<jend; j++) { 1957 if (*jj < rstart || *jj >= rend) olen++; 1958 else dlen++; 1959 jj++; 1960 } 1961 olens[i] = olen; 1962 dlens[i] = dlen; 1963 } 1964 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 1965 ierr = PetscFree(dlens);CHKERRQ(ierr); 1966 } else { 1967 int ml,nl; 1968 1969 M = *newmat; 1970 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 1971 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 1972 ierr = MatZeroEntries(M);CHKERRQ(ierr); 1973 /* 1974 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1975 rather than the slower MatSetValues(). 1976 */ 1977 M->was_assembled = PETSC_TRUE; 1978 M->assembled = PETSC_FALSE; 1979 } 1980 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 1981 aij = (Mat_SeqAIJ*)(Mreuse)->data; 1982 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix"); 1983 ii = aij->i; 1984 jj = aij->j; 1985 aa = aij->a; 1986 for (i=0; i<m; i++) { 1987 row = rstart + i; 1988 nz = ii[i+1] - ii[i]; 1989 cwork = jj; jj += nz; 1990 vwork = aa; aa += nz; 1991 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 1992 } 1993 1994 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1995 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1996 *newmat = M; 1997 1998 /* save submatrix used in processor for next request */ 1999 if (call == MAT_INITIAL_MATRIX) { 2000 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2001 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2002 } 2003 2004 PetscFunctionReturn(0); 2005 } 2006 2007 #undef __FUNCT__ 2008 #define __FUNCT__ "MatMPIAIJSetPreallocation" 2009 /*@C 2010 MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format 2011 (the default parallel PETSc format). For good matrix assembly performance 2012 the user should preallocate the matrix storage by setting the parameters 2013 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2014 performance can be increased by more than a factor of 50. 2015 2016 Collective on MPI_Comm 2017 2018 Input Parameters: 2019 + A - the matrix 2020 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2021 (same value is used for all local rows) 2022 . d_nnz - array containing the number of nonzeros in the various rows of the 2023 DIAGONAL portion of the local submatrix (possibly different for each row) 2024 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2025 The size of this array is equal to the number of local rows, i.e 'm'. 2026 You must leave room for the diagonal entry even if it is zero. 2027 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2028 submatrix (same value is used for all local rows). 2029 - o_nnz - array containing the number of nonzeros in the various rows of the 2030 OFF-DIAGONAL portion of the local submatrix (possibly different for 2031 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2032 structure. The size of this array is equal to the number 2033 of local rows, i.e 'm'. 2034 2035 The AIJ format (also called the Yale sparse matrix format or 2036 compressed row storage), is fully compatible with standard Fortran 77 2037 storage. That is, the stored row and column indices can begin at 2038 either one (as in Fortran) or zero. See the users manual for details. 2039 2040 The user MUST specify either the local or global matrix dimensions 2041 (possibly both). 2042 2043 The parallel matrix is partitioned such that the first m0 rows belong to 2044 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2045 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2046 2047 The DIAGONAL portion of the local submatrix of a processor can be defined 2048 as the submatrix which is obtained by extraction the part corresponding 2049 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2050 first row that belongs to the processor, and r2 is the last row belonging 2051 to the this processor. This is a square mxm matrix. The remaining portion 2052 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2053 2054 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2055 2056 By default, this format uses inodes (identical nodes) when possible. 2057 We search for consecutive rows with the same nonzero structure, thereby 2058 reusing matrix information to achieve increased efficiency. 2059 2060 Options Database Keys: 2061 + -mat_aij_no_inode - Do not use inodes 2062 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2063 - -mat_aij_oneindex - Internally use indexing starting at 1 2064 rather than 0. Note that when calling MatSetValues(), 2065 the user still MUST index entries starting at 0! 2066 2067 Example usage: 2068 2069 Consider the following 8x8 matrix with 34 non-zero values, that is 2070 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2071 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2072 as follows: 2073 2074 .vb 2075 1 2 0 | 0 3 0 | 0 4 2076 Proc0 0 5 6 | 7 0 0 | 8 0 2077 9 0 10 | 11 0 0 | 12 0 2078 ------------------------------------- 2079 13 0 14 | 15 16 17 | 0 0 2080 Proc1 0 18 0 | 19 20 21 | 0 0 2081 0 0 0 | 22 23 0 | 24 0 2082 ------------------------------------- 2083 Proc2 25 26 27 | 0 0 28 | 29 0 2084 30 0 0 | 31 32 33 | 0 34 2085 .ve 2086 2087 This can be represented as a collection of submatrices as: 2088 2089 .vb 2090 A B C 2091 D E F 2092 G H I 2093 .ve 2094 2095 Where the submatrices A,B,C are owned by proc0, D,E,F are 2096 owned by proc1, G,H,I are owned by proc2. 2097 2098 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2099 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2100 The 'M','N' parameters are 8,8, and have the same values on all procs. 2101 2102 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2103 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2104 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2105 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2106 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2107 matrix, ans [DF] as another SeqAIJ matrix. 2108 2109 When d_nz, o_nz parameters are specified, d_nz storage elements are 2110 allocated for every row of the local diagonal submatrix, and o_nz 2111 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2112 One way to choose d_nz and o_nz is to use the max nonzerors per local 2113 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2114 In this case, the values of d_nz,o_nz are: 2115 .vb 2116 proc0 : dnz = 2, o_nz = 2 2117 proc1 : dnz = 3, o_nz = 2 2118 proc2 : dnz = 1, o_nz = 4 2119 .ve 2120 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2121 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2122 for proc3. i.e we are using 12+15+10=37 storage locations to store 2123 34 values. 2124 2125 When d_nnz, o_nnz parameters are specified, the storage is specified 2126 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2127 In the above case the values for d_nnz,o_nnz are: 2128 .vb 2129 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2130 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2131 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2132 .ve 2133 Here the space allocated is sum of all the above values i.e 34, and 2134 hence pre-allocation is perfect. 2135 2136 Level: intermediate 2137 2138 .keywords: matrix, aij, compressed row, sparse, parallel 2139 2140 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2141 @*/ 2142 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz) 2143 { 2144 Mat_MPIAIJ *b; 2145 int ierr,i; 2146 PetscTruth flg2; 2147 2148 PetscFunctionBegin; 2149 ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr); 2150 if (!flg2) PetscFunctionReturn(0); 2151 B->preallocated = PETSC_TRUE; 2152 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 2153 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 2154 if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz); 2155 if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz); 2156 if (d_nnz) { 2157 for (i=0; i<B->m; i++) { 2158 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 2159 } 2160 } 2161 if (o_nnz) { 2162 for (i=0; i<B->m; i++) { 2163 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 2164 } 2165 } 2166 b = (Mat_MPIAIJ*)B->data; 2167 2168 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2169 PetscLogObjectParent(B,b->A); 2170 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2171 PetscLogObjectParent(B,b->B); 2172 2173 PetscFunctionReturn(0); 2174 } 2175 2176 #undef __FUNCT__ 2177 #define __FUNCT__ "MatCreateMPIAIJ" 2178 /*@C 2179 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 2180 (the default parallel PETSc format). For good matrix assembly performance 2181 the user should preallocate the matrix storage by setting the parameters 2182 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2183 performance can be increased by more than a factor of 50. 2184 2185 Collective on MPI_Comm 2186 2187 Input Parameters: 2188 + comm - MPI communicator 2189 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2190 This value should be the same as the local size used in creating the 2191 y vector for the matrix-vector product y = Ax. 2192 . n - This value should be the same as the local size used in creating the 2193 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 2194 calculated if N is given) For square matrices n is almost always m. 2195 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2196 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2197 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 2198 (same value is used for all local rows) 2199 . d_nnz - array containing the number of nonzeros in the various rows of the 2200 DIAGONAL portion of the local submatrix (possibly different for each row) 2201 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 2202 The size of this array is equal to the number of local rows, i.e 'm'. 2203 You must leave room for the diagonal entry even if it is zero. 2204 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 2205 submatrix (same value is used for all local rows). 2206 - o_nnz - array containing the number of nonzeros in the various rows of the 2207 OFF-DIAGONAL portion of the local submatrix (possibly different for 2208 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 2209 structure. The size of this array is equal to the number 2210 of local rows, i.e 'm'. 2211 2212 Output Parameter: 2213 . A - the matrix 2214 2215 Notes: 2216 m,n,M,N parameters specify the size of the matrix, and its partitioning across 2217 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 2218 storage requirements for this matrix. 2219 2220 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 2221 processor than it must be used on all processors that share the object for 2222 that argument. 2223 2224 The AIJ format (also called the Yale sparse matrix format or 2225 compressed row storage), is fully compatible with standard Fortran 77 2226 storage. That is, the stored row and column indices can begin at 2227 either one (as in Fortran) or zero. See the users manual for details. 2228 2229 The user MUST specify either the local or global matrix dimensions 2230 (possibly both). 2231 2232 The parallel matrix is partitioned such that the first m0 rows belong to 2233 process 0, the next m1 rows belong to process 1, the next m2 rows belong 2234 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 2235 2236 The DIAGONAL portion of the local submatrix of a processor can be defined 2237 as the submatrix which is obtained by extraction the part corresponding 2238 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 2239 first row that belongs to the processor, and r2 is the last row belonging 2240 to the this processor. This is a square mxm matrix. The remaining portion 2241 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 2242 2243 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 2244 2245 By default, this format uses inodes (identical nodes) when possible. 2246 We search for consecutive rows with the same nonzero structure, thereby 2247 reusing matrix information to achieve increased efficiency. 2248 2249 Options Database Keys: 2250 + -mat_aij_no_inode - Do not use inodes 2251 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 2252 - -mat_aij_oneindex - Internally use indexing starting at 1 2253 rather than 0. Note that when calling MatSetValues(), 2254 the user still MUST index entries starting at 0! 2255 2256 2257 Example usage: 2258 2259 Consider the following 8x8 matrix with 34 non-zero values, that is 2260 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 2261 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 2262 as follows: 2263 2264 .vb 2265 1 2 0 | 0 3 0 | 0 4 2266 Proc0 0 5 6 | 7 0 0 | 8 0 2267 9 0 10 | 11 0 0 | 12 0 2268 ------------------------------------- 2269 13 0 14 | 15 16 17 | 0 0 2270 Proc1 0 18 0 | 19 20 21 | 0 0 2271 0 0 0 | 22 23 0 | 24 0 2272 ------------------------------------- 2273 Proc2 25 26 27 | 0 0 28 | 29 0 2274 30 0 0 | 31 32 33 | 0 34 2275 .ve 2276 2277 This can be represented as a collection of submatrices as: 2278 2279 .vb 2280 A B C 2281 D E F 2282 G H I 2283 .ve 2284 2285 Where the submatrices A,B,C are owned by proc0, D,E,F are 2286 owned by proc1, G,H,I are owned by proc2. 2287 2288 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2289 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 2290 The 'M','N' parameters are 8,8, and have the same values on all procs. 2291 2292 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 2293 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 2294 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 2295 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 2296 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 2297 matrix, ans [DF] as another SeqAIJ matrix. 2298 2299 When d_nz, o_nz parameters are specified, d_nz storage elements are 2300 allocated for every row of the local diagonal submatrix, and o_nz 2301 storage locations are allocated for every row of the OFF-DIAGONAL submat. 2302 One way to choose d_nz and o_nz is to use the max nonzerors per local 2303 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 2304 In this case, the values of d_nz,o_nz are: 2305 .vb 2306 proc0 : dnz = 2, o_nz = 2 2307 proc1 : dnz = 3, o_nz = 2 2308 proc2 : dnz = 1, o_nz = 4 2309 .ve 2310 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 2311 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 2312 for proc3. i.e we are using 12+15+10=37 storage locations to store 2313 34 values. 2314 2315 When d_nnz, o_nnz parameters are specified, the storage is specified 2316 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 2317 In the above case the values for d_nnz,o_nnz are: 2318 .vb 2319 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 2320 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 2321 proc2: d_nnz = [1,1] and o_nnz = [4,4] 2322 .ve 2323 Here the space allocated is sum of all the above values i.e 34, and 2324 hence pre-allocation is perfect. 2325 2326 Level: intermediate 2327 2328 .keywords: matrix, aij, compressed row, sparse, parallel 2329 2330 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2331 @*/ 2332 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2333 { 2334 int ierr,size; 2335 2336 PetscFunctionBegin; 2337 ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr); 2338 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2339 if (size > 1) { 2340 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 2341 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 2342 } else { 2343 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 2344 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 2345 } 2346 PetscFunctionReturn(0); 2347 } 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 2351 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap) 2352 { 2353 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2354 PetscFunctionBegin; 2355 *Ad = a->A; 2356 *Ao = a->B; 2357 *colmap = a->garray; 2358 PetscFunctionReturn(0); 2359 } 2360 2361 #undef __FUNCT__ 2362 #define __FUNCT__ "MatSetColoring_MPIAIJ" 2363 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 2364 { 2365 int ierr; 2366 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2367 2368 PetscFunctionBegin; 2369 if (coloring->ctype == IS_COLORING_LOCAL) { 2370 int *allcolors,*colors,i; 2371 ISColoring ocoloring; 2372 2373 /* set coloring for diagonal portion */ 2374 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 2375 2376 /* set coloring for off-diagonal portion */ 2377 ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 2378 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2379 for (i=0; i<a->B->n; i++) { 2380 colors[i] = allcolors[a->garray[i]]; 2381 } 2382 ierr = PetscFree(allcolors);CHKERRQ(ierr); 2383 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2384 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2385 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2386 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 2387 int *colors,i,*larray; 2388 ISColoring ocoloring; 2389 2390 /* set coloring for diagonal portion */ 2391 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2392 for (i=0; i<a->A->n; i++) { 2393 larray[i] = i + a->cstart; 2394 } 2395 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 2396 ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2397 for (i=0; i<a->A->n; i++) { 2398 colors[i] = coloring->colors[larray[i]]; 2399 } 2400 ierr = PetscFree(larray);CHKERRQ(ierr); 2401 ierr = ISColoringCreate(MPI_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr); 2402 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 2403 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2404 2405 /* set coloring for off-diagonal portion */ 2406 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr); 2407 ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 2408 ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr); 2409 for (i=0; i<a->B->n; i++) { 2410 colors[i] = coloring->colors[larray[i]]; 2411 } 2412 ierr = PetscFree(larray);CHKERRQ(ierr); 2413 ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr); 2414 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 2415 ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr); 2416 } else { 2417 SETERRQ1(1,"No support ISColoringType %d",coloring->ctype); 2418 } 2419 2420 PetscFunctionReturn(0); 2421 } 2422 2423 #undef __FUNCT__ 2424 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 2425 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 2426 { 2427 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2428 int ierr; 2429 2430 PetscFunctionBegin; 2431 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 2432 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 2433 PetscFunctionReturn(0); 2434 } 2435 2436 #undef __FUNCT__ 2437 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 2438 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues) 2439 { 2440 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2441 int ierr; 2442 2443 PetscFunctionBegin; 2444 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 2445 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 2446 PetscFunctionReturn(0); 2447 } 2448