1 /*$Id: mpiaij.c,v 1.308 1999/11/24 21:53:50 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 extern int MatSetUpMultiply_MPIAIJ(Mat); 8 extern int DisAssemble_MPIAIJ(Mat); 9 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 10 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 12 extern int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 26 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 27 int n = B->n,i,ierr; 28 29 PetscFunctionBegin; 30 #if defined (PETSC_USE_CTABLE) 31 ierr = PetscTableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr); 32 for (i=0; i<n; i++){ 33 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 34 } 35 #else 36 aij->colmap = (int*)PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 37 PLogObjectMemory(mat,aij->N*sizeof(int)); 38 ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr); 39 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 40 #endif 41 PetscFunctionReturn(0); 42 } 43 44 #define CHUNKSIZE 15 45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 46 { \ 47 \ 48 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 49 rmax = aimax[row]; nrow = ailen[row]; \ 50 col1 = col - shift; \ 51 \ 52 low = 0; high = nrow; \ 53 while (high-low > 5) { \ 54 t = (low+high)/2; \ 55 if (rp[t] > col) high = t; \ 56 else low = t; \ 57 } \ 58 for (_i=0; _i<nrow; _i++) { \ 59 if (rp[_i] > col1) break; \ 60 if (rp[_i] == col1) { \ 61 if (addv == ADD_VALUES) ap[_i] += value; \ 62 else ap[_i] = value; \ 63 goto a_noinsert; \ 64 } \ 65 } \ 66 if (nonew == 1) goto a_noinsert; \ 67 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 68 if (nrow >= rmax) { \ 69 /* there is no extra room in row, therefore enlarge */ \ 70 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 71 Scalar *new_a; \ 72 \ 73 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 74 \ 75 /* malloc new storage space */ \ 76 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 77 new_a = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 78 new_j = (int*)(new_a + new_nz); \ 79 new_i = new_j + new_nz; \ 80 \ 81 /* copy over old data into new slots */ \ 82 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \ 83 for (ii=row+1; ii<a->m+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 84 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 85 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 86 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 87 len*sizeof(int));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 90 len*sizeof(Scalar));CHKERRQ(ierr); \ 91 /* free up old matrix storage */ \ 92 \ 93 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 94 if (!a->singlemalloc) { \ 95 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 96 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 97 } \ 98 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 99 a->singlemalloc = PETSC_TRUE; \ 100 \ 101 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 102 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 103 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 104 a->maxnz += CHUNKSIZE; \ 105 a->reallocs++; \ 106 } \ 107 N = nrow++ - 1; a->nz++; \ 108 /* shift up all the later entries in this row */ \ 109 for (ii=N; ii>=_i; ii--) { \ 110 rp[ii+1] = rp[ii]; \ 111 ap[ii+1] = ap[ii]; \ 112 } \ 113 rp[_i] = col1; \ 114 ap[_i] = value; \ 115 a_noinsert: ; \ 116 ailen[row] = nrow; \ 117 } 118 119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 120 { \ 121 \ 122 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 123 rmax = bimax[row]; nrow = bilen[row]; \ 124 col1 = col - shift; \ 125 \ 126 low = 0; high = nrow; \ 127 while (high-low > 5) { \ 128 t = (low+high)/2; \ 129 if (rp[t] > col) high = t; \ 130 else low = t; \ 131 } \ 132 for (_i=0; _i<nrow; _i++) { \ 133 if (rp[_i] > col1) break; \ 134 if (rp[_i] == col1) { \ 135 if (addv == ADD_VALUES) ap[_i] += value; \ 136 else ap[_i] = value; \ 137 goto b_noinsert; \ 138 } \ 139 } \ 140 if (nonew == 1) goto b_noinsert; \ 141 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 142 if (nrow >= rmax) { \ 143 /* there is no extra room in row, therefore enlarge */ \ 144 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 145 Scalar *new_a; \ 146 \ 147 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 148 \ 149 /* malloc new storage space */ \ 150 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 151 new_a = (Scalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 152 new_j = (int*)(new_a + new_nz); \ 153 new_i = new_j + new_nz; \ 154 \ 155 /* copy over old data into new slots */ \ 156 for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \ 157 for (ii=row+1; ii<b->m+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 158 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 159 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 160 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 161 len*sizeof(int));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 163 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 164 len*sizeof(Scalar));CHKERRQ(ierr); \ 165 /* free up old matrix storage */ \ 166 \ 167 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 168 if (!b->singlemalloc) { \ 169 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 170 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 171 } \ 172 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 173 b->singlemalloc = PETSC_TRUE; \ 174 \ 175 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 176 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 177 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 178 b->maxnz += CHUNKSIZE; \ 179 b->reallocs++; \ 180 } \ 181 N = nrow++ - 1; b->nz++; \ 182 /* shift up all the later entries in this row */ \ 183 for (ii=N; ii>=_i; ii--) { \ 184 rp[ii+1] = rp[ii]; \ 185 ap[ii+1] = ap[ii]; \ 186 } \ 187 rp[_i] = col1; \ 188 ap[_i] = value; \ 189 b_noinsert: ; \ 190 bilen[row] = nrow; \ 191 } 192 193 #undef __FUNC__ 194 #define __FUNC__ "MatSetValues_MPIAIJ" 195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 196 { 197 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 198 Scalar value; 199 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 200 int cstart = aij->cstart,cend = aij->cend,row,col; 201 int roworiented = aij->roworiented; 202 203 /* Some Variables required in the macro */ 204 Mat A = aij->A; 205 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 206 int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 207 Scalar *aa = a->a; 208 PetscTruth ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE:PETSC_FALSE); 209 Mat B = aij->B; 210 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 211 int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j; 212 Scalar *ba = b->a; 213 214 int *rp,ii,nrow,_i,rmax,N,col1,low,high,t; 215 int nonew = a->nonew,shift = a->indexshift; 216 Scalar *ap; 217 218 PetscFunctionBegin; 219 for (i=0; i<m; i++) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_BOPT_g) 222 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 223 #endif 224 if (im[i] >= rstart && im[i] < rend) { 225 row = im[i] - rstart; 226 for (j=0; j<n; j++) { 227 if (in[j] >= cstart && in[j] < cend){ 228 col = in[j] - cstart; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 if (ignorezeroentries && value == 0.0) continue; 231 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 232 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 233 } 234 else if (in[j] < 0) continue; 235 #if defined(PETSC_USE_BOPT_g) 236 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 237 #endif 238 else { 239 if (mat->was_assembled) { 240 if (!aij->colmap) { 241 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 242 } 243 #if defined (PETSC_USE_CTABLE) 244 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 245 col--; 246 #else 247 col = aij->colmap[in[j]] - 1; 248 #endif 249 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 250 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 251 col = in[j]; 252 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 253 B = aij->B; 254 b = (Mat_SeqAIJ*)B->data; 255 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 256 ba = b->a; 257 } 258 } else col = in[j]; 259 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 260 if (ignorezeroentries && value == 0.0) continue; 261 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 262 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 263 } 264 } 265 } else { 266 if (!aij->donotstash) { 267 if (roworiented) { 268 if (ignorezeroentries && v[i*n] == 0.0) continue; 269 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 270 } else { 271 if (ignorezeroentries && v[i] == 0.0) continue; 272 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 273 } 274 } 275 } 276 } 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNC__ 281 #define __FUNC__ "MatGetValues_MPIAIJ" 282 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 283 { 284 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 285 int ierr,i,j,rstart = aij->rstart,rend = aij->rend; 286 int cstart = aij->cstart,cend = aij->cend,row,col; 287 288 PetscFunctionBegin; 289 for (i=0; i<m; i++) { 290 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 291 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 292 if (idxm[i] >= rstart && idxm[i] < rend) { 293 row = idxm[i] - rstart; 294 for (j=0; j<n; j++) { 295 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 296 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 297 if (idxn[j] >= cstart && idxn[j] < cend){ 298 col = idxn[j] - cstart; 299 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 300 } else { 301 if (!aij->colmap) { 302 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 303 } 304 #if defined (PETSC_USE_CTABLE) 305 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 306 col --; 307 #else 308 col = aij->colmap[idxn[j]] - 1; 309 #endif 310 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 311 else { 312 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 313 } 314 } 315 } 316 } else { 317 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 318 } 319 } 320 PetscFunctionReturn(0); 321 } 322 323 #undef __FUNC__ 324 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 325 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 326 { 327 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 328 int ierr,nstash,reallocs; 329 InsertMode addv; 330 331 PetscFunctionBegin; 332 if (aij->donotstash) { 333 PetscFunctionReturn(0); 334 } 335 336 /* make sure all processors are either in INSERTMODE or ADDMODE */ 337 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 338 if (addv == (ADD_VALUES|INSERT_VALUES)) { 339 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 340 } 341 mat->insertmode = addv; /* in case this processor had no cache */ 342 343 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 344 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 345 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 346 PetscFunctionReturn(0); 347 } 348 349 350 #undef __FUNC__ 351 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 352 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 353 { 354 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 355 int i,j,rstart,ncols,n,ierr,flg; 356 int *row,*col,other_disassembled; 357 Scalar *val; 358 InsertMode addv = mat->insertmode; 359 360 PetscFunctionBegin; 361 if (!aij->donotstash) { 362 while (1) { 363 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 364 if (!flg) break; 365 366 for (i=0; i<n;) { 367 /* Now identify the consecutive vals belonging to the same row */ 368 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 369 if (j < n) ncols = j-i; 370 else ncols = n-i; 371 /* Now assemble all these values with a single function call */ 372 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 373 i = j; 374 } 375 } 376 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 377 } 378 379 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 380 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 381 382 /* determine if any processor has disassembled, if so we must 383 also disassemble ourselfs, in order that we may reassemble. */ 384 /* 385 if nonzero structure of submatrix B cannot change then we know that 386 no processor disassembled thus we can skip this stuff 387 */ 388 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 389 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 390 if (mat->was_assembled && !other_disassembled) { 391 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 392 } 393 } 394 395 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 396 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 397 } 398 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 399 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 400 401 if (aij->rowvalues) { 402 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 403 aij->rowvalues = 0; 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNC__ 409 #define __FUNC__ "MatZeroEntries_MPIAIJ" 410 int MatZeroEntries_MPIAIJ(Mat A) 411 { 412 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 413 int ierr; 414 415 PetscFunctionBegin; 416 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 417 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 418 PetscFunctionReturn(0); 419 } 420 421 #undef __FUNC__ 422 #define __FUNC__ "MatZeroRows_MPIAIJ" 423 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 424 { 425 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 426 int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 427 int *procs,*nprocs,j,found,idx,nsends,*work,row; 428 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 429 int *rvalues,tag = A->tag,count,base,slen,n,*source; 430 int *lens,imdex,*lrows,*values,rstart=l->rstart; 431 MPI_Comm comm = A->comm; 432 MPI_Request *send_waits,*recv_waits; 433 MPI_Status recv_status,*send_status; 434 IS istmp; 435 436 PetscFunctionBegin; 437 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 438 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 439 440 /* first count number of contributors to each processor */ 441 nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 442 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 443 procs = nprocs + size; 444 owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 445 for (i=0; i<N; i++) { 446 idx = rows[i]; 447 found = 0; 448 for (j=0; j<size; j++) { 449 if (idx >= owners[j] && idx < owners[j+1]) { 450 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 451 } 452 } 453 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 454 } 455 nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 456 457 /* inform other processors of number of messages and max length*/ 458 work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 459 ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 460 nrecvs = work[size+rank]; 461 nmax = work[rank]; 462 ierr = PetscFree(work);CHKERRQ(ierr); 463 464 /* post receives: */ 465 rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 466 recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 467 for (i=0; i<nrecvs; i++) { 468 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 469 } 470 471 /* do sends: 472 1) starts[i] gives the starting index in svalues for stuff going to 473 the ith processor 474 */ 475 svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 476 send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 477 starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 478 starts[0] = 0; 479 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 480 for (i=0; i<N; i++) { 481 svalues[starts[owner[i]]++] = rows[i]; 482 } 483 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 484 485 starts[0] = 0; 486 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 487 count = 0; 488 for (i=0; i<size; i++) { 489 if (procs[i]) { 490 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 491 } 492 } 493 ierr = PetscFree(starts);CHKERRQ(ierr); 494 495 base = owners[rank]; 496 497 /* wait on receives */ 498 lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 499 source = lens + nrecvs; 500 count = nrecvs; slen = 0; 501 while (count) { 502 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 503 /* unpack receives into our local space */ 504 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 505 source[imdex] = recv_status.MPI_SOURCE; 506 lens[imdex] = n; 507 slen += n; 508 count--; 509 } 510 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 511 512 /* move the data into the send scatter */ 513 lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 514 count = 0; 515 for (i=0; i<nrecvs; i++) { 516 values = rvalues + i*nmax; 517 for (j=0; j<lens[i]; j++) { 518 lrows[count++] = values[j] - base; 519 } 520 } 521 ierr = PetscFree(rvalues);CHKERRQ(ierr); 522 ierr = PetscFree(lens);CHKERRQ(ierr); 523 ierr = PetscFree(owner);CHKERRQ(ierr); 524 ierr = PetscFree(nprocs);CHKERRQ(ierr); 525 526 /* actually zap the local rows */ 527 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 528 PLogObjectParent(A,istmp); 529 530 /* 531 Zero the required rows. If the "diagonal block" of the matrix 532 is square and the user wishes to set the diagonal we use seperate 533 code so that MatSetValues() is not called for each diagonal allocating 534 new memory, thus calling lots of mallocs and slowing things down. 535 536 Contributed by: Mathew Knepley 537 */ 538 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 539 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 540 if (diag && (l->A->M == l->A->N)) { 541 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 542 } else if (diag) { 543 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 544 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 545 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 546 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 547 } 548 for (i = 0; i < slen; i++) { 549 row = lrows[i] + rstart; 550 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 551 } 552 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 553 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 554 } else { 555 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 556 } 557 ierr = ISDestroy(istmp);CHKERRQ(ierr); 558 ierr = PetscFree(lrows);CHKERRQ(ierr); 559 560 /* wait on sends */ 561 if (nsends) { 562 send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 563 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 564 ierr = PetscFree(send_status);CHKERRQ(ierr); 565 } 566 ierr = PetscFree(send_waits);CHKERRQ(ierr); 567 ierr = PetscFree(svalues);CHKERRQ(ierr); 568 569 PetscFunctionReturn(0); 570 } 571 572 #undef __FUNC__ 573 #define __FUNC__ "MatMult_MPIAIJ" 574 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 575 { 576 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 577 int ierr,nt; 578 579 PetscFunctionBegin; 580 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 581 if (nt != a->n) { 582 SETERRQ2(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A (%d) and xx (%d)",a->n,nt); 583 } 584 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 585 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 586 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 587 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 588 PetscFunctionReturn(0); 589 } 590 591 #undef __FUNC__ 592 #define __FUNC__ "MatMultAdd_MPIAIJ" 593 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 594 { 595 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 596 int ierr; 597 598 PetscFunctionBegin; 599 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 600 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 601 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 602 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 603 PetscFunctionReturn(0); 604 } 605 606 #undef __FUNC__ 607 #define __FUNC__ "MatMultTranspose_MPIAIJ" 608 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 609 { 610 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 611 int ierr; 612 613 PetscFunctionBegin; 614 /* do nondiagonal part */ 615 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 616 /* send it on its way */ 617 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 618 /* do local part */ 619 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 620 /* receive remote parts: note this assumes the values are not actually */ 621 /* inserted in yy until the next line, which is true for my implementation*/ 622 /* but is not perhaps always true. */ 623 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 624 PetscFunctionReturn(0); 625 } 626 627 #undef __FUNC__ 628 #define __FUNC__ "MatMultTransposeAdd_MPIAIJ" 629 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 630 { 631 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 632 int ierr; 633 634 PetscFunctionBegin; 635 /* do nondiagonal part */ 636 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 637 /* send it on its way */ 638 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 639 /* do local part */ 640 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 641 /* receive remote parts: note this assumes the values are not actually */ 642 /* inserted in yy until the next line, which is true for my implementation*/ 643 /* but is not perhaps always true. */ 644 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 645 PetscFunctionReturn(0); 646 } 647 648 /* 649 This only works correctly for square matrices where the subblock A->A is the 650 diagonal block 651 */ 652 #undef __FUNC__ 653 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 654 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 655 { 656 int ierr; 657 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 658 659 PetscFunctionBegin; 660 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 661 if (a->rstart != a->cstart || a->rend != a->cend) { 662 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 663 } 664 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 665 PetscFunctionReturn(0); 666 } 667 668 #undef __FUNC__ 669 #define __FUNC__ "MatScale_MPIAIJ" 670 int MatScale_MPIAIJ(Scalar *aa,Mat A) 671 { 672 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 673 int ierr; 674 675 PetscFunctionBegin; 676 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 677 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 678 PetscFunctionReturn(0); 679 } 680 681 #undef __FUNC__ 682 #define __FUNC__ "MatDestroy_MPIAIJ" 683 int MatDestroy_MPIAIJ(Mat mat) 684 { 685 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 686 int ierr; 687 688 PetscFunctionBegin; 689 690 if (mat->mapping) { 691 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 692 } 693 if (mat->bmapping) { 694 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 695 } 696 if (mat->rmap) { 697 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 698 } 699 if (mat->cmap) { 700 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 701 } 702 #if defined(PETSC_USE_LOG) 703 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 704 #endif 705 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 706 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 707 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 708 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 709 #if defined (PETSC_USE_CTABLE) 710 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 711 #else 712 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 713 #endif 714 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 715 if (aij->lvec) {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);} 716 if (aij->Mvctx) {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);} 717 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 718 ierr = PetscFree(aij);CHKERRQ(ierr); 719 PLogObjectDestroy(mat); 720 PetscHeaderDestroy(mat); 721 PetscFunctionReturn(0); 722 } 723 724 #undef __FUNC__ 725 #define __FUNC__ "MatView_MPIAIJ_Binary" 726 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 727 { 728 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 729 int ierr; 730 731 PetscFunctionBegin; 732 if (aij->size == 1) { 733 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 734 } 735 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 736 PetscFunctionReturn(0); 737 } 738 739 #undef __FUNC__ 740 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 741 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 742 { 743 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 744 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 745 int ierr,format,shift = C->indexshift,rank = aij->rank,size = aij->size; 746 PetscTruth isdraw,isascii,flg; 747 748 PetscFunctionBegin; 749 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 750 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 751 if (isascii) { 752 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 753 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 754 MatInfo info; 755 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 756 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 757 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 758 if (flg) { 759 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 760 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 761 } else { 762 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 763 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 764 } 765 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 766 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 767 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 768 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 769 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 770 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 771 PetscFunctionReturn(0); 772 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 773 PetscFunctionReturn(0); 774 } 775 } else if (isdraw) { 776 Draw draw; 777 PetscTruth isnull; 778 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 779 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 780 } 781 782 if (size == 1) { 783 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 784 } else { 785 /* assemble the entire matrix onto first processor. */ 786 Mat A; 787 Mat_SeqAIJ *Aloc; 788 int M = aij->M,N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 789 Scalar *a; 790 791 if (!rank) { 792 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 793 } else { 794 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 795 } 796 PLogObjectParent(mat,A); 797 798 /* copy over the A part */ 799 Aloc = (Mat_SeqAIJ*)aij->A->data; 800 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 801 row = aij->rstart; 802 for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;} 803 for (i=0; i<m; i++) { 804 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 805 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 806 } 807 aj = Aloc->j; 808 for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;} 809 810 /* copy over the B part */ 811 Aloc = (Mat_SeqAIJ*)aij->B->data; 812 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 813 row = aij->rstart; 814 ct = cols = (int*)PetscMalloc((ai[m]+1)*sizeof(int));CHKPTRQ(cols); 815 for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];} 816 for (i=0; i<m; i++) { 817 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 818 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 819 } 820 ierr = PetscFree(ct);CHKERRQ(ierr); 821 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 /* 824 Everyone has to call to draw the matrix since the graphics waits are 825 synchronized across all processors that share the Draw object 826 */ 827 if (!rank) { 828 Viewer sviewer; 829 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 830 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 831 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 832 } 833 ierr = MatDestroy(A);CHKERRQ(ierr); 834 } 835 PetscFunctionReturn(0); 836 } 837 838 #undef __FUNC__ 839 #define __FUNC__ "MatView_MPIAIJ" 840 int MatView_MPIAIJ(Mat mat,Viewer viewer) 841 { 842 int ierr; 843 PetscTruth isascii,isdraw,issocket,isbinary; 844 845 PetscFunctionBegin; 846 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 847 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 848 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 849 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 850 if (isascii || isdraw || isbinary || issocket) { 851 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 852 } else { 853 SETERRQ1(1,1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 854 } 855 PetscFunctionReturn(0); 856 } 857 858 /* 859 This has to provide several versions. 860 861 2) a) use only local smoothing updating outer values only once. 862 b) local smoothing updating outer values each inner iteration 863 3) color updating out values betwen colors. 864 */ 865 #undef __FUNC__ 866 #define __FUNC__ "MatRelax_MPIAIJ" 867 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag, 868 PetscReal fshift,int its,Vec xx) 869 { 870 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 871 Mat AA = mat->A,BB = mat->B; 872 Mat_SeqAIJ *A = (Mat_SeqAIJ*)AA->data,*B = (Mat_SeqAIJ *)BB->data; 873 Scalar *b,*x,*xs,*ls,d,*v,sum; 874 int ierr,*idx,*diag; 875 int n = mat->n,m = mat->m,i,shift = A->indexshift; 876 877 PetscFunctionBegin; 878 if (!A->diag) {ierr = MatMarkDiagonal_SeqAIJ(AA);CHKERRQ(ierr);} 879 diag = A->diag; 880 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 881 if (flag & SOR_ZERO_INITIAL_GUESS) { 882 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 883 PetscFunctionReturn(0); 884 } 885 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 886 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 887 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 888 if (xx != bb) { 889 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 890 } else { 891 b = x; 892 } 893 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 894 xs = x + shift; /* shift by one for index start of 1 */ 895 ls = ls + shift; 896 while (its--) { 897 /* go down through the rows */ 898 for (i=0; i<m; i++) { 899 n = A->i[i+1] - A->i[i]; 900 PLogFlops(4*n+3); 901 idx = A->j + A->i[i] + shift; 902 v = A->a + A->i[i] + shift; 903 sum = b[i]; 904 SPARSEDENSEMDOT(sum,xs,v,idx,n); 905 d = fshift + A->a[diag[i]+shift]; 906 n = B->i[i+1] - B->i[i]; 907 idx = B->j + B->i[i] + shift; 908 v = B->a + B->i[i] + shift; 909 SPARSEDENSEMDOT(sum,ls,v,idx,n); 910 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 911 } 912 /* come up through the rows */ 913 for (i=m-1; i>-1; i--) { 914 n = A->i[i+1] - A->i[i]; 915 PLogFlops(4*n+3) 916 idx = A->j + A->i[i] + shift; 917 v = A->a + A->i[i] + shift; 918 sum = b[i]; 919 SPARSEDENSEMDOT(sum,xs,v,idx,n); 920 d = fshift + A->a[diag[i]+shift]; 921 n = B->i[i+1] - B->i[i]; 922 idx = B->j + B->i[i] + shift; 923 v = B->a + B->i[i] + shift; 924 SPARSEDENSEMDOT(sum,ls,v,idx,n); 925 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 926 } 927 } 928 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 929 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 930 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 931 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 932 if (flag & SOR_ZERO_INITIAL_GUESS) { 933 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 934 PetscFunctionReturn(0); 935 } 936 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 937 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 938 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 939 if (xx != bb) { 940 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 941 } else { 942 b = x; 943 } 944 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 945 xs = x + shift; /* shift by one for index start of 1 */ 946 ls = ls + shift; 947 while (its--) { 948 for (i=0; i<m; i++) { 949 n = A->i[i+1] - A->i[i]; 950 PLogFlops(4*n+3); 951 idx = A->j + A->i[i] + shift; 952 v = A->a + A->i[i] + shift; 953 sum = b[i]; 954 SPARSEDENSEMDOT(sum,xs,v,idx,n); 955 d = fshift + A->a[diag[i]+shift]; 956 n = B->i[i+1] - B->i[i]; 957 idx = B->j + B->i[i] + shift; 958 v = B->a + B->i[i] + shift; 959 SPARSEDENSEMDOT(sum,ls,v,idx,n); 960 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 961 } 962 } 963 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 964 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 965 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 966 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 967 if (flag & SOR_ZERO_INITIAL_GUESS) { 968 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 972 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 973 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 974 if (xx != bb) { 975 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 976 } else { 977 b = x; 978 } 979 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 980 xs = x + shift; /* shift by one for index start of 1 */ 981 ls = ls + shift; 982 while (its--) { 983 for (i=m-1; i>-1; i--) { 984 n = A->i[i+1] - A->i[i]; 985 PLogFlops(4*n+3); 986 idx = A->j + A->i[i] + shift; 987 v = A->a + A->i[i] + shift; 988 sum = b[i]; 989 SPARSEDENSEMDOT(sum,xs,v,idx,n); 990 d = fshift + A->a[diag[i]+shift]; 991 n = B->i[i+1] - B->i[i]; 992 idx = B->j + B->i[i] + shift; 993 v = B->a + B->i[i] + shift; 994 SPARSEDENSEMDOT(sum,ls,v,idx,n); 995 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 996 } 997 } 998 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 999 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 1000 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1001 } else { 1002 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1003 } 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNC__ 1008 #define __FUNC__ "MatGetInfo_MPIAIJ" 1009 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1010 { 1011 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1012 Mat A = mat->A,B = mat->B; 1013 int ierr; 1014 PetscReal isend[5],irecv[5]; 1015 1016 PetscFunctionBegin; 1017 info->block_size = 1.0; 1018 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1019 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1020 isend[3] = info->memory; isend[4] = info->mallocs; 1021 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1022 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1023 isend[3] += info->memory; isend[4] += info->mallocs; 1024 if (flag == MAT_LOCAL) { 1025 info->nz_used = isend[0]; 1026 info->nz_allocated = isend[1]; 1027 info->nz_unneeded = isend[2]; 1028 info->memory = isend[3]; 1029 info->mallocs = isend[4]; 1030 } else if (flag == MAT_GLOBAL_MAX) { 1031 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1032 info->nz_used = irecv[0]; 1033 info->nz_allocated = irecv[1]; 1034 info->nz_unneeded = irecv[2]; 1035 info->memory = irecv[3]; 1036 info->mallocs = irecv[4]; 1037 } else if (flag == MAT_GLOBAL_SUM) { 1038 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1039 info->nz_used = irecv[0]; 1040 info->nz_allocated = irecv[1]; 1041 info->nz_unneeded = irecv[2]; 1042 info->memory = irecv[3]; 1043 info->mallocs = irecv[4]; 1044 } 1045 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1046 info->fill_ratio_needed = 0; 1047 info->factor_mallocs = 0; 1048 info->rows_global = (double)mat->M; 1049 info->columns_global = (double)mat->N; 1050 info->rows_local = (double)mat->m; 1051 info->columns_local = (double)mat->N; 1052 1053 PetscFunctionReturn(0); 1054 } 1055 1056 #undef __FUNC__ 1057 #define __FUNC__ "MatSetOption_MPIAIJ" 1058 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1059 { 1060 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1061 int ierr; 1062 1063 PetscFunctionBegin; 1064 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1065 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1066 op == MAT_COLUMNS_UNSORTED || 1067 op == MAT_COLUMNS_SORTED || 1068 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1069 op == MAT_KEEP_ZEROED_ROWS || 1070 op == MAT_NEW_NONZERO_LOCATION_ERR || 1071 op == MAT_IGNORE_ZERO_ENTRIES) { 1072 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1073 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1074 } else if (op == MAT_ROW_ORIENTED) { 1075 a->roworiented = PETSC_TRUE; 1076 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1077 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1078 } else if (op == MAT_ROWS_SORTED || 1079 op == MAT_ROWS_UNSORTED || 1080 op == MAT_SYMMETRIC || 1081 op == MAT_STRUCTURALLY_SYMMETRIC || 1082 op == MAT_YES_NEW_DIAGONALS) { 1083 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1084 } else if (op == MAT_COLUMN_ORIENTED) { 1085 a->roworiented = PETSC_FALSE; 1086 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1087 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1088 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1089 a->donotstash = PETSC_TRUE; 1090 } else if (op == MAT_NO_NEW_DIAGONALS){ 1091 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1092 } else { 1093 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1094 } 1095 PetscFunctionReturn(0); 1096 } 1097 1098 #undef __FUNC__ 1099 #define __FUNC__ "MatGetSize_MPIAIJ" 1100 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1101 { 1102 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1103 1104 PetscFunctionBegin; 1105 if (m) *m = mat->M; 1106 if (n) *n = mat->N; 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNC__ 1111 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1112 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1113 { 1114 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1115 1116 PetscFunctionBegin; 1117 if (m) *m = mat->m; 1118 if (n) *n = mat->n; 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNC__ 1123 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1124 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1125 { 1126 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1127 1128 PetscFunctionBegin; 1129 *m = mat->rstart; *n = mat->rend; 1130 PetscFunctionReturn(0); 1131 } 1132 1133 #undef __FUNC__ 1134 #define __FUNC__ "MatGetRow_MPIAIJ" 1135 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1136 { 1137 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1138 Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1139 int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart; 1140 int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend; 1141 int *cmap,*idx_p; 1142 1143 PetscFunctionBegin; 1144 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1145 mat->getrowactive = PETSC_TRUE; 1146 1147 if (!mat->rowvalues && (idx || v)) { 1148 /* 1149 allocate enough space to hold information from the longest row. 1150 */ 1151 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1152 int max = 1,m = mat->m,tmp; 1153 for (i=0; i<m; i++) { 1154 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1155 if (max < tmp) { max = tmp; } 1156 } 1157 mat->rowvalues = (Scalar*)PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1158 mat->rowindices = (int*)(mat->rowvalues + max); 1159 } 1160 1161 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1162 lrow = row - rstart; 1163 1164 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1165 if (!v) {pvA = 0; pvB = 0;} 1166 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1167 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1168 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1169 nztot = nzA + nzB; 1170 1171 cmap = mat->garray; 1172 if (v || idx) { 1173 if (nztot) { 1174 /* Sort by increasing column numbers, assuming A and B already sorted */ 1175 int imark = -1; 1176 if (v) { 1177 *v = v_p = mat->rowvalues; 1178 for (i=0; i<nzB; i++) { 1179 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1180 else break; 1181 } 1182 imark = i; 1183 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1184 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1185 } 1186 if (idx) { 1187 *idx = idx_p = mat->rowindices; 1188 if (imark > -1) { 1189 for (i=0; i<imark; i++) { 1190 idx_p[i] = cmap[cworkB[i]]; 1191 } 1192 } else { 1193 for (i=0; i<nzB; i++) { 1194 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1195 else break; 1196 } 1197 imark = i; 1198 } 1199 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1200 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1201 } 1202 } else { 1203 if (idx) *idx = 0; 1204 if (v) *v = 0; 1205 } 1206 } 1207 *nz = nztot; 1208 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1209 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #undef __FUNC__ 1214 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1215 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1216 { 1217 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1218 1219 PetscFunctionBegin; 1220 if (aij->getrowactive == PETSC_FALSE) { 1221 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1222 } 1223 aij->getrowactive = PETSC_FALSE; 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #undef __FUNC__ 1228 #define __FUNC__ "MatNorm_MPIAIJ" 1229 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1230 { 1231 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1232 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1233 int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift; 1234 PetscReal sum = 0.0; 1235 Scalar *v; 1236 1237 PetscFunctionBegin; 1238 if (aij->size == 1) { 1239 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1240 } else { 1241 if (type == NORM_FROBENIUS) { 1242 v = amat->a; 1243 for (i=0; i<amat->nz; i++) { 1244 #if defined(PETSC_USE_COMPLEX) 1245 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1246 #else 1247 sum += (*v)*(*v); v++; 1248 #endif 1249 } 1250 v = bmat->a; 1251 for (i=0; i<bmat->nz; i++) { 1252 #if defined(PETSC_USE_COMPLEX) 1253 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1254 #else 1255 sum += (*v)*(*v); v++; 1256 #endif 1257 } 1258 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1259 *norm = sqrt(*norm); 1260 } else if (type == NORM_1) { /* max column norm */ 1261 PetscReal *tmp,*tmp2; 1262 int *jj,*garray = aij->garray; 1263 tmp = (PetscReal*)PetscMalloc((aij->N+1)*sizeof(PetscReal));CHKPTRQ(tmp); 1264 tmp2 = (PetscReal*)PetscMalloc((aij->N+1)*sizeof(PetscReal));CHKPTRQ(tmp2); 1265 ierr = PetscMemzero(tmp,aij->N*sizeof(PetscReal));CHKERRQ(ierr); 1266 *norm = 0.0; 1267 v = amat->a; jj = amat->j; 1268 for (j=0; j<amat->nz; j++) { 1269 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1270 } 1271 v = bmat->a; jj = bmat->j; 1272 for (j=0; j<bmat->nz; j++) { 1273 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1274 } 1275 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1276 for (j=0; j<aij->N; j++) { 1277 if (tmp2[j] > *norm) *norm = tmp2[j]; 1278 } 1279 ierr = PetscFree(tmp);CHKERRQ(ierr); 1280 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1281 } else if (type == NORM_INFINITY) { /* max row norm */ 1282 PetscReal ntemp = 0.0; 1283 for (j=0; j<amat->m; j++) { 1284 v = amat->a + amat->i[j] + shift; 1285 sum = 0.0; 1286 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1287 sum += PetscAbsScalar(*v); v++; 1288 } 1289 v = bmat->a + bmat->i[j] + shift; 1290 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1291 sum += PetscAbsScalar(*v); v++; 1292 } 1293 if (sum > ntemp) ntemp = sum; 1294 } 1295 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1296 } else { 1297 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1298 } 1299 } 1300 PetscFunctionReturn(0); 1301 } 1302 1303 #undef __FUNC__ 1304 #define __FUNC__ "MatTranspose_MPIAIJ" 1305 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1306 { 1307 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1308 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data; 1309 int ierr,shift = Aloc->indexshift; 1310 int M = a->M,N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1311 Mat B; 1312 Scalar *array; 1313 1314 PetscFunctionBegin; 1315 if (!matout && M != N) { 1316 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1317 } 1318 1319 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1320 1321 /* copy over the A part */ 1322 Aloc = (Mat_SeqAIJ*)a->A->data; 1323 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1324 row = a->rstart; 1325 for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;} 1326 for (i=0; i<m; i++) { 1327 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1328 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1329 } 1330 aj = Aloc->j; 1331 for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;} 1332 1333 /* copy over the B part */ 1334 Aloc = (Mat_SeqAIJ*)a->B->data; 1335 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1336 row = a->rstart; 1337 ct = cols = (int*)PetscMalloc((1+ai[m]-shift)*sizeof(int));CHKPTRQ(cols); 1338 for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];} 1339 for (i=0; i<m; i++) { 1340 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1341 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1342 } 1343 ierr = PetscFree(ct);CHKERRQ(ierr); 1344 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1345 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1346 if (matout) { 1347 *matout = B; 1348 } else { 1349 PetscOps *Abops; 1350 struct _MatOps *Aops; 1351 1352 /* This isn't really an in-place transpose .... but free data structures from a */ 1353 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 1354 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1355 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1356 #if defined (PETSC_USE_CTABLE) 1357 if (a->colmap) {ierr = PetscTableDelete(a->colmap);CHKERRQ(ierr);} 1358 #else 1359 if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);} 1360 #endif 1361 if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);} 1362 if (a->lvec) {ierr = VecDestroy(a->lvec);CHKERRQ(ierr);} 1363 if (a->Mvctx) {ierr = VecScatterDestroy(a->Mvctx);CHKERRQ(ierr);} 1364 ierr = PetscFree(a);CHKERRQ(ierr); 1365 1366 /* 1367 This is horrible, horrible code. We need to keep the 1368 A pointers for the bops and ops but copy everything 1369 else from C. 1370 */ 1371 Abops = A->bops; 1372 Aops = A->ops; 1373 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1374 A->bops = Abops; 1375 A->ops = Aops; 1376 PetscHeaderDestroy(B); 1377 } 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #undef __FUNC__ 1382 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1383 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1384 { 1385 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1386 Mat a = aij->A,b = aij->B; 1387 int ierr,s1,s2,s3; 1388 1389 PetscFunctionBegin; 1390 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1391 if (rr) { 1392 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1393 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1394 /* Overlap communication with computation. */ 1395 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1396 } 1397 if (ll) { 1398 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1399 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1400 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1401 } 1402 /* scale the diagonal block */ 1403 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1404 1405 if (rr) { 1406 /* Do a scatter end and then right scale the off-diagonal block */ 1407 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1408 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1409 } 1410 1411 PetscFunctionReturn(0); 1412 } 1413 1414 1415 #undef __FUNC__ 1416 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1417 int MatPrintHelp_MPIAIJ(Mat A) 1418 { 1419 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1420 int ierr; 1421 1422 PetscFunctionBegin; 1423 if (!a->rank) { 1424 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1425 } 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNC__ 1430 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1431 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1432 { 1433 PetscFunctionBegin; 1434 *bs = 1; 1435 PetscFunctionReturn(0); 1436 } 1437 #undef __FUNC__ 1438 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1439 int MatSetUnfactored_MPIAIJ(Mat A) 1440 { 1441 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1442 int ierr; 1443 1444 PetscFunctionBegin; 1445 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1446 PetscFunctionReturn(0); 1447 } 1448 1449 #undef __FUNC__ 1450 #define __FUNC__ "MatEqual_MPIAIJ" 1451 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag) 1452 { 1453 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 1454 Mat a,b,c,d; 1455 PetscTruth flg; 1456 int ierr; 1457 1458 PetscFunctionBegin; 1459 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1460 a = matA->A; b = matA->B; 1461 c = matB->A; d = matB->B; 1462 1463 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1464 if (flg == PETSC_TRUE) { 1465 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1466 } 1467 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1468 PetscFunctionReturn(0); 1469 } 1470 1471 #undef __FUNC__ 1472 #define __FUNC__ "MatCopy_MPIAIJ" 1473 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1474 { 1475 int ierr; 1476 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1477 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1478 1479 PetscFunctionBegin; 1480 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1481 /* because of the column compression in the off-processor part of the matrix a->B, 1482 the number of columns in a->B and b->B may be different, hence we cannot call 1483 the MatCopy() directly on the two parts. If need be, we can provide a more 1484 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1485 then copying the submatrices */ 1486 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1487 } else { 1488 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1489 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1490 } 1491 PetscFunctionReturn(0); 1492 } 1493 1494 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1495 extern int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int); 1496 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1497 extern int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **); 1498 extern int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *); 1499 1500 /* -------------------------------------------------------------------*/ 1501 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1502 MatGetRow_MPIAIJ, 1503 MatRestoreRow_MPIAIJ, 1504 MatMult_MPIAIJ, 1505 MatMultAdd_MPIAIJ, 1506 MatMultTranspose_MPIAIJ, 1507 MatMultTransposeAdd_MPIAIJ, 1508 0, 1509 0, 1510 0, 1511 0, 1512 0, 1513 0, 1514 MatRelax_MPIAIJ, 1515 MatTranspose_MPIAIJ, 1516 MatGetInfo_MPIAIJ, 1517 MatEqual_MPIAIJ, 1518 MatGetDiagonal_MPIAIJ, 1519 MatDiagonalScale_MPIAIJ, 1520 MatNorm_MPIAIJ, 1521 MatAssemblyBegin_MPIAIJ, 1522 MatAssemblyEnd_MPIAIJ, 1523 0, 1524 MatSetOption_MPIAIJ, 1525 MatZeroEntries_MPIAIJ, 1526 MatZeroRows_MPIAIJ, 1527 0, 1528 0, 1529 0, 1530 0, 1531 MatGetSize_MPIAIJ, 1532 MatGetLocalSize_MPIAIJ, 1533 MatGetOwnershipRange_MPIAIJ, 1534 0, 1535 0, 1536 0, 1537 0, 1538 MatDuplicate_MPIAIJ, 1539 0, 1540 0, 1541 0, 1542 0, 1543 0, 1544 MatGetSubMatrices_MPIAIJ, 1545 MatIncreaseOverlap_MPIAIJ, 1546 MatGetValues_MPIAIJ, 1547 MatCopy_MPIAIJ, 1548 MatPrintHelp_MPIAIJ, 1549 MatScale_MPIAIJ, 1550 0, 1551 0, 1552 0, 1553 MatGetBlockSize_MPIAIJ, 1554 0, 1555 0, 1556 0, 1557 0, 1558 MatFDColoringCreate_MPIAIJ, 1559 0, 1560 MatSetUnfactored_MPIAIJ, 1561 0, 1562 0, 1563 MatGetSubMatrix_MPIAIJ, 1564 0, 1565 0, 1566 MatGetMaps_Petsc}; 1567 1568 /* ----------------------------------------------------------------------------------------*/ 1569 1570 EXTERN_C_BEGIN 1571 #undef __FUNC__ 1572 #define __FUNC__ "MatStoreValues_MPIAIJ" 1573 int MatStoreValues_MPIAIJ(Mat mat) 1574 { 1575 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1576 int ierr; 1577 1578 PetscFunctionBegin; 1579 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1580 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1581 PetscFunctionReturn(0); 1582 } 1583 EXTERN_C_END 1584 1585 EXTERN_C_BEGIN 1586 #undef __FUNC__ 1587 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1588 int MatRetrieveValues_MPIAIJ(Mat mat) 1589 { 1590 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1591 int ierr; 1592 1593 PetscFunctionBegin; 1594 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1595 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1596 PetscFunctionReturn(0); 1597 } 1598 EXTERN_C_END 1599 1600 #include "pc.h" 1601 EXTERN_C_BEGIN 1602 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1603 EXTERN_C_END 1604 1605 #undef __FUNC__ 1606 #define __FUNC__ "MatCreateMPIAIJ" 1607 /*@C 1608 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1609 (the default parallel PETSc format). For good matrix assembly performance 1610 the user should preallocate the matrix storage by setting the parameters 1611 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1612 performance can be increased by more than a factor of 50. 1613 1614 Collective on MPI_Comm 1615 1616 Input Parameters: 1617 + comm - MPI communicator 1618 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1619 This value should be the same as the local size used in creating the 1620 y vector for the matrix-vector product y = Ax. 1621 . n - This value should be the same as the local size used in creating the 1622 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1623 calculated if N is given) For square matrices n is almost always m. 1624 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1625 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1626 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1627 (same value is used for all local rows) 1628 . d_nnz - array containing the number of nonzeros in the various rows of the 1629 DIAGONAL portion of the local submatrix (possibly different for each row) 1630 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1631 The size of this array is equal to the number of local rows, i.e 'm'. 1632 You must leave room for the diagonal entry even if it is zero. 1633 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1634 submatrix (same value is used for all local rows). 1635 - o_nnz - array containing the number of nonzeros in the various rows of the 1636 OFF-DIAGONAL portion of the local submatrix (possibly different for 1637 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1638 structure. The size of this array is equal to the number 1639 of local rows, i.e 'm'. 1640 1641 Output Parameter: 1642 . A - the matrix 1643 1644 Notes: 1645 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1646 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1647 storage requirements for this matrix. 1648 1649 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1650 processor than it must be used on all processors that share the object for 1651 that argument. 1652 1653 The AIJ format (also called the Yale sparse matrix format or 1654 compressed row storage), is fully compatible with standard Fortran 77 1655 storage. That is, the stored row and column indices can begin at 1656 either one (as in Fortran) or zero. See the users manual for details. 1657 1658 The user MUST specify either the local or global matrix dimensions 1659 (possibly both). 1660 1661 The parallel matrix is partitioned such that the first m0 rows belong to 1662 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1663 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1664 1665 The DIAGONAL portion of the local submatrix of a processor can be defined 1666 as the submatrix which is obtained by extraction the part corresponding 1667 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1668 first row that belongs to the processor, and r2 is the last row belonging 1669 to the this processor. This is a square mxm matrix. The remaining portion 1670 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1671 1672 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1673 1674 By default, this format uses inodes (identical nodes) when possible. 1675 We search for consecutive rows with the same nonzero structure, thereby 1676 reusing matrix information to achieve increased efficiency. 1677 1678 Options Database Keys: 1679 + -mat_aij_no_inode - Do not use inodes 1680 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1681 - -mat_aij_oneindex - Internally use indexing starting at 1 1682 rather than 0. Note that when calling MatSetValues(), 1683 the user still MUST index entries starting at 0! 1684 . -mat_mpi - use the parallel matrix data structures even on one processor 1685 (defaults to using SeqBAIJ format on one processor) 1686 . -mat_mpi - use the parallel matrix data structures even on one processor 1687 (defaults to using SeqAIJ format on one processor) 1688 1689 1690 Example usage: 1691 1692 Consider the following 8x8 matrix with 34 non-zero values, that is 1693 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1694 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1695 as follows: 1696 1697 .vb 1698 1 2 0 | 0 3 0 | 0 4 1699 Proc0 0 5 6 | 7 0 0 | 8 0 1700 9 0 10 | 11 0 0 | 12 0 1701 ------------------------------------- 1702 13 0 14 | 15 16 17 | 0 0 1703 Proc1 0 18 0 | 19 20 21 | 0 0 1704 0 0 0 | 22 23 0 | 24 0 1705 ------------------------------------- 1706 Proc2 25 26 27 | 0 0 28 | 29 0 1707 30 0 0 | 31 32 33 | 0 34 1708 .ve 1709 1710 This can be represented as a collection of submatrices as: 1711 1712 .vb 1713 A B C 1714 D E F 1715 G H I 1716 .ve 1717 1718 Where the submatrices A,B,C are owned by proc0, D,E,F are 1719 owned by proc1, G,H,I are owned by proc2. 1720 1721 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1722 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1723 The 'M','N' parameters are 8,8, and have the same values on all procs. 1724 1725 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1726 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1727 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1728 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1729 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1730 matrix, ans [DF] as another SeqAIJ matrix. 1731 1732 When d_nz, o_nz parameters are specified, d_nz storage elements are 1733 allocated for every row of the local diagonal submatrix, and o_nz 1734 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1735 One way to choose d_nz and o_nz is to use the max nonzerors per local 1736 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1737 In this case, the values of d_nz,o_nz are: 1738 .vb 1739 proc0 : dnz = 2, o_nz = 2 1740 proc1 : dnz = 3, o_nz = 2 1741 proc2 : dnz = 1, o_nz = 4 1742 .ve 1743 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1744 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1745 for proc3. i.e we are using 12+15+10=37 storage locations to store 1746 34 values. 1747 1748 When d_nnz, o_nnz parameters are specified, the storage is specified 1749 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1750 In the above case the values for d_nnz,o_nnz are: 1751 .vb 1752 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1753 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1754 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1755 .ve 1756 Here the space allocated is sum of all the above values i.e 34, and 1757 hence pre-allocation is perfect. 1758 1759 Level: intermediate 1760 1761 .keywords: matrix, aij, compressed row, sparse, parallel 1762 1763 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1764 @*/ 1765 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1766 { 1767 Mat B; 1768 Mat_MPIAIJ *b; 1769 int ierr,i,size; 1770 PetscTruth flag1,flag2; 1771 1772 PetscFunctionBegin; 1773 1774 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz); 1775 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz); 1776 if (d_nnz) { 1777 for (i=0; i<m; i++) { 1778 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1779 } 1780 } 1781 if (o_nnz) { 1782 for (i=0; i<m; i++) { 1783 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1784 } 1785 } 1786 1787 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1788 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr); 1789 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1790 if (!flag1 && !flag2 && size == 1) { 1791 if (M == PETSC_DECIDE) M = m; 1792 if (N == PETSC_DECIDE) N = n; 1793 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 1794 PetscFunctionReturn(0); 1795 } 1796 1797 *A = 0; 1798 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1799 PLogObjectCreate(B); 1800 B->data = (void*)(b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b); 1801 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1802 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1803 B->ops->destroy = MatDestroy_MPIAIJ; 1804 B->ops->view = MatView_MPIAIJ; 1805 B->factor = 0; 1806 B->assembled = PETSC_FALSE; 1807 B->mapping = 0; 1808 1809 B->insertmode = NOT_SET_VALUES; 1810 b->size = size; 1811 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1812 1813 if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 1814 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1815 } 1816 1817 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1818 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1819 b->m = m; B->m = m; 1820 b->n = n; B->n = n; 1821 b->N = N; B->N = N; 1822 b->M = M; B->M = M; 1823 1824 /* the information in the maps duplicates the information computed below, eventually 1825 we should remove the duplicate information that is not contained in the maps */ 1826 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1827 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1828 1829 /* build local table of row and column ownerships */ 1830 b->rowners = (int*)PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1831 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1832 b->cowners = b->rowners + b->size + 2; 1833 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1834 b->rowners[0] = 0; 1835 for (i=2; i<=b->size; i++) { 1836 b->rowners[i] += b->rowners[i-1]; 1837 } 1838 b->rstart = b->rowners[b->rank]; 1839 b->rend = b->rowners[b->rank+1]; 1840 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1841 b->cowners[0] = 0; 1842 for (i=2; i<=b->size; i++) { 1843 b->cowners[i] += b->cowners[i-1]; 1844 } 1845 b->cstart = b->cowners[b->rank]; 1846 b->cend = b->cowners[b->rank+1]; 1847 1848 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1849 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1850 PLogObjectParent(B,b->A); 1851 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1852 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1853 PLogObjectParent(B,b->B); 1854 1855 /* build cache for off array entries formed */ 1856 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 1857 b->donotstash = PETSC_FALSE; 1858 b->colmap = 0; 1859 b->garray = 0; 1860 b->roworiented = PETSC_TRUE; 1861 1862 /* stuff used for matrix vector multiply */ 1863 b->lvec = PETSC_NULL; 1864 b->Mvctx = PETSC_NULL; 1865 1866 /* stuff for MatGetRow() */ 1867 b->rowindices = 0; 1868 b->rowvalues = 0; 1869 b->getrowactive = PETSC_FALSE; 1870 1871 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1872 "MatStoreValues_MPIAIJ", 1873 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1874 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1875 "MatRetrieveValues_MPIAIJ", 1876 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1877 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1878 "MatGetDiagonalBlock_MPIAIJ", 1879 (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1880 *A = B; 1881 PetscFunctionReturn(0); 1882 } 1883 1884 #undef __FUNC__ 1885 #define __FUNC__ "MatDuplicate_MPIAIJ" 1886 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1887 { 1888 Mat mat; 1889 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 1890 int ierr,len = 0; 1891 PetscTruth flg; 1892 1893 PetscFunctionBegin; 1894 *newmat = 0; 1895 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1896 PLogObjectCreate(mat); 1897 mat->data = (void*)(a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a); 1898 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1899 mat->ops->destroy = MatDestroy_MPIAIJ; 1900 mat->ops->view = MatView_MPIAIJ; 1901 mat->factor = matin->factor; 1902 mat->assembled = PETSC_TRUE; 1903 mat->insertmode = NOT_SET_VALUES; 1904 1905 a->m = mat->m = oldmat->m; 1906 a->n = mat->n = oldmat->n; 1907 a->M = mat->M = oldmat->M; 1908 a->N = mat->N = oldmat->N; 1909 1910 a->rstart = oldmat->rstart; 1911 a->rend = oldmat->rend; 1912 a->cstart = oldmat->cstart; 1913 a->cend = oldmat->cend; 1914 a->size = oldmat->size; 1915 a->rank = oldmat->rank; 1916 a->donotstash = oldmat->donotstash; 1917 a->roworiented = oldmat->roworiented; 1918 a->rowindices = 0; 1919 a->rowvalues = 0; 1920 a->getrowactive = PETSC_FALSE; 1921 1922 a->rowners = (int*)PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1923 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1924 a->cowners = a->rowners + a->size + 2; 1925 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1926 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1927 if (oldmat->colmap) { 1928 #if defined (PETSC_USE_CTABLE) 1929 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1930 #else 1931 a->colmap = (int*)PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1932 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1933 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr); 1934 #endif 1935 } else a->colmap = 0; 1936 if (oldmat->garray) { 1937 len = ((Mat_SeqAIJ*)(oldmat->B->data))->n; 1938 a->garray = (int*)PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray); 1939 PLogObjectMemory(mat,len*sizeof(int)); 1940 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1941 } else a->garray = 0; 1942 1943 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1944 PLogObjectParent(mat,a->lvec); 1945 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1946 PLogObjectParent(mat,a->Mvctx); 1947 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1948 PLogObjectParent(mat,a->A); 1949 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1950 PLogObjectParent(mat,a->B); 1951 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1952 if (flg) { 1953 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1954 } 1955 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1956 *newmat = mat; 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #include "sys.h" 1961 1962 #undef __FUNC__ 1963 #define __FUNC__ "MatLoad_MPIAIJ" 1964 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1965 { 1966 Mat A; 1967 Scalar *vals,*svals; 1968 MPI_Comm comm = ((PetscObject)viewer)->comm; 1969 MPI_Status status; 1970 int i,nz,ierr,j,rstart,rend,fd; 1971 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1972 int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols; 1973 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1974 1975 PetscFunctionBegin; 1976 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1977 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1978 if (!rank) { 1979 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1980 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1981 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1982 if (header[3] < 0) { 1983 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1984 } 1985 } 1986 1987 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1988 M = header[1]; N = header[2]; 1989 /* determine ownership of all rows */ 1990 m = M/size + ((M % size) > rank); 1991 rowners = (int*)PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1992 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1993 rowners[0] = 0; 1994 for (i=2; i<=size; i++) { 1995 rowners[i] += rowners[i-1]; 1996 } 1997 rstart = rowners[rank]; 1998 rend = rowners[rank+1]; 1999 2000 /* distribute row lengths to all processors */ 2001 ourlens = (int*)PetscMalloc(2*(rend-rstart)*sizeof(int));CHKPTRQ(ourlens); 2002 offlens = ourlens + (rend-rstart); 2003 if (!rank) { 2004 rowlengths = (int*)PetscMalloc(M*sizeof(int));CHKPTRQ(rowlengths); 2005 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2006 sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2007 for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i]; 2008 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2009 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2010 } else { 2011 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2012 } 2013 2014 if (!rank) { 2015 /* calculate the number of nonzeros on each processor */ 2016 procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2017 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2018 for (i=0; i<size; i++) { 2019 for (j=rowners[i]; j< rowners[i+1]; j++) { 2020 procsnz[i] += rowlengths[j]; 2021 } 2022 } 2023 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2024 2025 /* determine max buffer needed and allocate it */ 2026 maxnz = 0; 2027 for (i=0; i<size; i++) { 2028 maxnz = PetscMax(maxnz,procsnz[i]); 2029 } 2030 cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2031 2032 /* read in my part of the matrix column indices */ 2033 nz = procsnz[0]; 2034 mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols); 2035 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2036 2037 /* read in every one elses and ship off */ 2038 for (i=1; i<size; i++) { 2039 nz = procsnz[i]; 2040 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2041 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2042 } 2043 ierr = PetscFree(cols);CHKERRQ(ierr); 2044 } else { 2045 /* determine buffer space needed for message */ 2046 nz = 0; 2047 for (i=0; i<m; i++) { 2048 nz += ourlens[i]; 2049 } 2050 mycols = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(mycols); 2051 2052 /* receive message of column indices*/ 2053 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2054 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2055 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2056 } 2057 2058 /* determine column ownership if matrix is not square */ 2059 if (N != M) { 2060 n = N/size + ((N % size) > rank); 2061 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2062 cstart = cend - n; 2063 } else { 2064 cstart = rstart; 2065 cend = rend; 2066 n = cend - cstart; 2067 } 2068 2069 /* loop over local rows, determining number of off diagonal entries */ 2070 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2071 jj = 0; 2072 for (i=0; i<m; i++) { 2073 for (j=0; j<ourlens[i]; j++) { 2074 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2075 jj++; 2076 } 2077 } 2078 2079 /* create our matrix */ 2080 for (i=0; i<m; i++) { 2081 ourlens[i] -= offlens[i]; 2082 } 2083 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2084 A = *newmat; 2085 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2086 for (i=0; i<m; i++) { 2087 ourlens[i] += offlens[i]; 2088 } 2089 2090 if (!rank) { 2091 vals = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(vals); 2092 2093 /* read in my part of the matrix numerical values */ 2094 nz = procsnz[0]; 2095 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2096 2097 /* insert into matrix */ 2098 jj = rstart; 2099 smycols = mycols; 2100 svals = vals; 2101 for (i=0; i<m; i++) { 2102 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2103 smycols += ourlens[i]; 2104 svals += ourlens[i]; 2105 jj++; 2106 } 2107 2108 /* read in other processors and ship out */ 2109 for (i=1; i<size; i++) { 2110 nz = procsnz[i]; 2111 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2112 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2113 } 2114 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2115 } else { 2116 /* receive numeric values */ 2117 vals = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(vals); 2118 2119 /* receive message of values*/ 2120 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2121 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2122 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2123 2124 /* insert into matrix */ 2125 jj = rstart; 2126 smycols = mycols; 2127 svals = vals; 2128 for (i=0; i<m; i++) { 2129 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2130 smycols += ourlens[i]; 2131 svals += ourlens[i]; 2132 jj++; 2133 } 2134 } 2135 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2136 ierr = PetscFree(vals);CHKERRQ(ierr); 2137 ierr = PetscFree(mycols);CHKERRQ(ierr); 2138 ierr = PetscFree(rowners);CHKERRQ(ierr); 2139 2140 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2141 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 #undef __FUNC__ 2146 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2147 /* 2148 Not great since it makes two copies of the submatrix, first an SeqAIJ 2149 in local and then by concatenating the local matrices the end result. 2150 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2151 */ 2152 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2153 { 2154 int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2155 Mat *local,M,Mreuse; 2156 Scalar *vwork,*aa; 2157 MPI_Comm comm = mat->comm; 2158 Mat_SeqAIJ *aij; 2159 int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend; 2160 2161 PetscFunctionBegin; 2162 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2163 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2164 2165 if (call == MAT_REUSE_MATRIX) { 2166 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2167 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before,cannot reuse"); 2168 local = &Mreuse; 2169 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2170 } else { 2171 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2172 Mreuse = *local; 2173 ierr = PetscFree(local);CHKERRQ(ierr); 2174 } 2175 2176 /* 2177 m - number of local rows 2178 n - number of columns (same on all processors) 2179 rstart - first row in new global matrix generated 2180 */ 2181 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2182 if (call == MAT_INITIAL_MATRIX) { 2183 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2184 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2185 ii = aij->i; 2186 jj = aij->j; 2187 2188 /* 2189 Determine the number of non-zeros in the diagonal and off-diagonal 2190 portions of the matrix in order to do correct preallocation 2191 */ 2192 2193 /* first get start and end of "diagonal" columns */ 2194 if (csize == PETSC_DECIDE) { 2195 nlocal = n/size + ((n % size) > rank); 2196 } else { 2197 nlocal = csize; 2198 } 2199 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2200 rstart = rend - nlocal; 2201 if (rank == size - 1 && rend != n) { 2202 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2203 } 2204 2205 /* next, compute all the lengths */ 2206 dlens = (int*)PetscMalloc((2*m+1)*sizeof(int));CHKPTRQ(dlens); 2207 olens = dlens + m; 2208 for (i=0; i<m; i++) { 2209 jend = ii[i+1] - ii[i]; 2210 olen = 0; 2211 dlen = 0; 2212 for (j=0; j<jend; j++) { 2213 if (*jj < rstart || *jj >= rend) olen++; 2214 else dlen++; 2215 jj++; 2216 } 2217 olens[i] = olen; 2218 dlens[i] = dlen; 2219 } 2220 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2221 ierr = PetscFree(dlens);CHKERRQ(ierr); 2222 } else { 2223 int ml,nl; 2224 2225 M = *newmat; 2226 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2227 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2228 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2229 /* 2230 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2231 rather than the slower MatSetValues(). 2232 */ 2233 M->was_assembled = PETSC_TRUE; 2234 M->assembled = PETSC_FALSE; 2235 } 2236 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2237 aij = (Mat_SeqAIJ*)(Mreuse)->data; 2238 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2239 ii = aij->i; 2240 jj = aij->j; 2241 aa = aij->a; 2242 for (i=0; i<m; i++) { 2243 row = rstart + i; 2244 nz = ii[i+1] - ii[i]; 2245 cwork = jj; jj += nz; 2246 vwork = aa; aa += nz; 2247 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2248 } 2249 2250 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2251 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2252 *newmat = M; 2253 2254 /* save submatrix used in processor for next request */ 2255 if (call == MAT_INITIAL_MATRIX) { 2256 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2257 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2258 } 2259 2260 PetscFunctionReturn(0); 2261 } 2262