1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.299 1999/09/14 17:38:39 bsmith Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 extern int MatSetUpMultiply_MPIAIJ(Mat); 10 extern int DisAssemble_MPIAIJ(Mat); 11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 14 extern int MatPrintHelp_SeqAIJ(Mat); 15 16 /* local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 26 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 27 int n = B->n,i,ierr; 28 29 PetscFunctionBegin; 30 #if defined (PETSC_USE_CTABLE) 31 ierr = TableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr); 32 for ( i=0; i<n; i++ ){ 33 ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 34 } 35 #else 36 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 37 PLogObjectMemory(mat,aij->N*sizeof(int)); 38 ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr); 39 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 40 #endif 41 PetscFunctionReturn(0); 42 } 43 44 #define CHUNKSIZE 15 45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 46 { \ 47 \ 48 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 49 rmax = aimax[row]; nrow = ailen[row]; \ 50 col1 = col - shift; \ 51 \ 52 low = 0; high = nrow; \ 53 while (high-low > 5) { \ 54 t = (low+high)/2; \ 55 if (rp[t] > col) high = t; \ 56 else low = t; \ 57 } \ 58 for ( _i=0; _i<nrow; _i++ ) { \ 59 if (rp[_i] > col1) break; \ 60 if (rp[_i] == col1) { \ 61 if (addv == ADD_VALUES) ap[_i] += value; \ 62 else ap[_i] = value; \ 63 goto a_noinsert; \ 64 } \ 65 } \ 66 if (nonew == 1) goto a_noinsert; \ 67 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 68 if (nrow >= rmax) { \ 69 /* there is no extra room in row, therefore enlarge */ \ 70 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 71 Scalar *new_a; \ 72 \ 73 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 74 \ 75 /* malloc new storage space */ \ 76 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 77 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 78 new_j = (int *) (new_a + new_nz); \ 79 new_i = new_j + new_nz; \ 80 \ 81 /* copy over old data into new slots */ \ 82 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 83 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 84 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 85 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 86 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 87 len*sizeof(int));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 90 len*sizeof(Scalar));CHKERRQ(ierr); \ 91 /* free up old matrix storage */ \ 92 \ 93 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 94 if (!a->singlemalloc) { \ 95 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 96 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 97 } \ 98 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 99 a->singlemalloc = 1; \ 100 \ 101 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 102 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 103 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 104 a->maxnz += CHUNKSIZE; \ 105 a->reallocs++; \ 106 } \ 107 N = nrow++ - 1; a->nz++; \ 108 /* shift up all the later entries in this row */ \ 109 for ( ii=N; ii>=_i; ii-- ) { \ 110 rp[ii+1] = rp[ii]; \ 111 ap[ii+1] = ap[ii]; \ 112 } \ 113 rp[_i] = col1; \ 114 ap[_i] = value; \ 115 a_noinsert: ; \ 116 ailen[row] = nrow; \ 117 } 118 119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 120 { \ 121 \ 122 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 123 rmax = bimax[row]; nrow = bilen[row]; \ 124 col1 = col - shift; \ 125 \ 126 low = 0; high = nrow; \ 127 while (high-low > 5) { \ 128 t = (low+high)/2; \ 129 if (rp[t] > col) high = t; \ 130 else low = t; \ 131 } \ 132 for ( _i=0; _i<nrow; _i++ ) { \ 133 if (rp[_i] > col1) break; \ 134 if (rp[_i] == col1) { \ 135 if (addv == ADD_VALUES) ap[_i] += value; \ 136 else ap[_i] = value; \ 137 goto b_noinsert; \ 138 } \ 139 } \ 140 if (nonew == 1) goto b_noinsert; \ 141 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 142 if (nrow >= rmax) { \ 143 /* there is no extra room in row, therefore enlarge */ \ 144 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 145 Scalar *new_a; \ 146 \ 147 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 148 \ 149 /* malloc new storage space */ \ 150 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 151 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 152 new_j = (int *) (new_a + new_nz); \ 153 new_i = new_j + new_nz; \ 154 \ 155 /* copy over old data into new slots */ \ 156 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 157 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 158 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 159 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 160 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 161 len*sizeof(int));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 163 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 164 len*sizeof(Scalar));CHKERRQ(ierr); \ 165 /* free up old matrix storage */ \ 166 \ 167 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 168 if (!b->singlemalloc) { \ 169 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 170 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 171 } \ 172 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 173 b->singlemalloc = 1; \ 174 \ 175 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 176 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 177 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 178 b->maxnz += CHUNKSIZE; \ 179 b->reallocs++; \ 180 } \ 181 N = nrow++ - 1; b->nz++; \ 182 /* shift up all the later entries in this row */ \ 183 for ( ii=N; ii>=_i; ii-- ) { \ 184 rp[ii+1] = rp[ii]; \ 185 ap[ii+1] = ap[ii]; \ 186 } \ 187 rp[_i] = col1; \ 188 ap[_i] = value; \ 189 b_noinsert: ; \ 190 bilen[row] = nrow; \ 191 } 192 193 #undef __FUNC__ 194 #define __FUNC__ "MatSetValues_MPIAIJ" 195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 196 { 197 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 198 Scalar value; 199 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 200 int cstart = aij->cstart, cend = aij->cend,row,col; 201 int roworiented = aij->roworiented; 202 203 /* Some Variables required in the macro */ 204 Mat A = aij->A; 205 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 206 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 207 Scalar *aa = a->a; 208 209 Mat B = aij->B; 210 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 211 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 212 Scalar *ba = b->a; 213 214 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 215 int nonew = a->nonew,shift = a->indexshift; 216 Scalar *ap; 217 218 PetscFunctionBegin; 219 for ( i=0; i<m; i++ ) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_BOPT_g) 222 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 223 #endif 224 if (im[i] >= rstart && im[i] < rend) { 225 row = im[i] - rstart; 226 for ( j=0; j<n; j++ ) { 227 if (in[j] >= cstart && in[j] < cend){ 228 col = in[j] - cstart; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } 233 else if (in[j] < 0) continue; 234 #if defined(PETSC_USE_BOPT_g) 235 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 236 #endif 237 else { 238 if (mat->was_assembled) { 239 if (!aij->colmap) { 240 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 241 } 242 #if defined (PETSC_USE_CTABLE) 243 ierr = TableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 244 col--; 245 #else 246 col = aij->colmap[in[j]] - 1; 247 #endif 248 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 249 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 250 col = in[j]; 251 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 252 B = aij->B; 253 b = (Mat_SeqAIJ *) B->data; 254 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 255 ba = b->a; 256 } 257 } else col = in[j]; 258 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNC__ 277 #define __FUNC__ "MatGetValues_MPIAIJ" 278 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 279 { 280 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 281 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 282 int cstart = aij->cstart, cend = aij->cend,row,col; 283 284 PetscFunctionBegin; 285 for ( i=0; i<m; i++ ) { 286 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 287 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 288 if (idxm[i] >= rstart && idxm[i] < rend) { 289 row = idxm[i] - rstart; 290 for ( j=0; j<n; j++ ) { 291 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 292 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 293 if (idxn[j] >= cstart && idxn[j] < cend){ 294 col = idxn[j] - cstart; 295 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 296 } else { 297 if (!aij->colmap) { 298 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 299 } 300 #if defined (PETSC_USE_CTABLE) 301 ierr = TableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 302 col --; 303 #else 304 col = aij->colmap[idxn[j]] - 1; 305 #endif 306 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 307 else { 308 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 309 } 310 } 311 } 312 } else { 313 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 314 } 315 } 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNC__ 320 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 321 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 322 { 323 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 324 int ierr,nstash,reallocs; 325 InsertMode addv; 326 327 PetscFunctionBegin; 328 if (aij->donotstash) { 329 PetscFunctionReturn(0); 330 } 331 332 /* make sure all processors are either in INSERTMODE or ADDMODE */ 333 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 334 if (addv == (ADD_VALUES|INSERT_VALUES)) { 335 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 336 } 337 mat->insertmode = addv; /* in case this processor had no cache */ 338 339 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 340 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 341 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 342 PetscFunctionReturn(0); 343 } 344 345 346 #undef __FUNC__ 347 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 348 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 349 { 350 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 351 int i,j,rstart,ncols,n,ierr,flg; 352 int *row,*col,other_disassembled; 353 Scalar *val; 354 InsertMode addv = mat->insertmode; 355 356 PetscFunctionBegin; 357 if (!aij->donotstash) { 358 while (1) { 359 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 360 if (!flg) break; 361 362 for ( i=0; i<n; ) { 363 /* Now identify the consecutive vals belonging to the same row */ 364 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 365 if (j < n) ncols = j-i; 366 else ncols = n-i; 367 /* Now assemble all these values with a single function call */ 368 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 369 i = j; 370 } 371 } 372 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 373 } 374 375 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 376 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 377 378 /* determine if any processor has disassembled, if so we must 379 also disassemble ourselfs, in order that we may reassemble. */ 380 /* 381 if nonzero structure of submatrix B cannot change then we know that 382 no processor disassembled thus we can skip this stuff 383 */ 384 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 385 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 386 if (mat->was_assembled && !other_disassembled) { 387 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 388 } 389 } 390 391 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 392 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 393 } 394 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 395 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 396 397 if (aij->rowvalues) { 398 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 399 aij->rowvalues = 0; 400 } 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNC__ 405 #define __FUNC__ "MatZeroEntries_MPIAIJ" 406 int MatZeroEntries_MPIAIJ(Mat A) 407 { 408 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 409 int ierr; 410 411 PetscFunctionBegin; 412 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 413 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNC__ 418 #define __FUNC__ "MatZeroRows_MPIAIJ" 419 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 420 { 421 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 422 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 423 int *procs,*nprocs,j,found,idx,nsends,*work,row; 424 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 425 int *rvalues,tag = A->tag,count,base,slen,n,*source; 426 int *lens,imdex,*lrows,*values,rstart=l->rstart; 427 MPI_Comm comm = A->comm; 428 MPI_Request *send_waits,*recv_waits; 429 MPI_Status recv_status,*send_status; 430 IS istmp; 431 432 PetscFunctionBegin; 433 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 434 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 435 436 /* first count number of contributors to each processor */ 437 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 438 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 439 procs = nprocs + size; 440 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 441 for ( i=0; i<N; i++ ) { 442 idx = rows[i]; 443 found = 0; 444 for ( j=0; j<size; j++ ) { 445 if (idx >= owners[j] && idx < owners[j+1]) { 446 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 447 } 448 } 449 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 450 } 451 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 452 453 /* inform other processors of number of messages and max length*/ 454 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 455 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 456 nrecvs = work[rank]; 457 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 458 nmax = work[rank]; 459 ierr = PetscFree(work);CHKERRQ(ierr); 460 461 /* post receives: */ 462 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 463 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 464 for ( i=0; i<nrecvs; i++ ) { 465 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 466 } 467 468 /* do sends: 469 1) starts[i] gives the starting index in svalues for stuff going to 470 the ith processor 471 */ 472 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 473 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 474 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 475 starts[0] = 0; 476 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 477 for ( i=0; i<N; i++ ) { 478 svalues[starts[owner[i]]++] = rows[i]; 479 } 480 ISRestoreIndices(is,&rows); 481 482 starts[0] = 0; 483 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 484 count = 0; 485 for ( i=0; i<size; i++ ) { 486 if (procs[i]) { 487 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 488 } 489 } 490 ierr = PetscFree(starts);CHKERRQ(ierr); 491 492 base = owners[rank]; 493 494 /* wait on receives */ 495 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 496 source = lens + nrecvs; 497 count = nrecvs; slen = 0; 498 while (count) { 499 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 500 /* unpack receives into our local space */ 501 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 502 source[imdex] = recv_status.MPI_SOURCE; 503 lens[imdex] = n; 504 slen += n; 505 count--; 506 } 507 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 508 509 /* move the data into the send scatter */ 510 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 511 count = 0; 512 for ( i=0; i<nrecvs; i++ ) { 513 values = rvalues + i*nmax; 514 for ( j=0; j<lens[i]; j++ ) { 515 lrows[count++] = values[j] - base; 516 } 517 } 518 ierr = PetscFree(rvalues);CHKERRQ(ierr); 519 ierr = PetscFree(lens);CHKERRQ(ierr); 520 ierr = PetscFree(owner);CHKERRQ(ierr); 521 ierr = PetscFree(nprocs);CHKERRQ(ierr); 522 523 /* actually zap the local rows */ 524 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 525 PLogObjectParent(A,istmp); 526 527 /* 528 Zero the required rows. If the "diagonal block" of the matrix 529 is square and the user wishes to set the diagonal we use seperate 530 code so that MatSetValues() is not called for each diagonal allocating 531 new memory, thus calling lots of mallocs and slowing things down. 532 533 Contributed by: Mathew Knepley 534 */ 535 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 536 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 537 if (diag && (l->A->M == l->A->N)) { 538 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 539 } else if (diag) { 540 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 541 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 542 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 544 } 545 for ( i = 0; i < slen; i++ ) { 546 row = lrows[i] + rstart; 547 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 548 } 549 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 550 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 } else { 552 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 553 } 554 ierr = ISDestroy(istmp);CHKERRQ(ierr); 555 ierr = PetscFree(lrows);CHKERRQ(ierr); 556 557 /* wait on sends */ 558 if (nsends) { 559 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 560 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 561 ierr = PetscFree(send_status);CHKERRQ(ierr); 562 } 563 ierr = PetscFree(send_waits);CHKERRQ(ierr); 564 ierr = PetscFree(svalues);CHKERRQ(ierr); 565 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNC__ 570 #define __FUNC__ "MatMult_MPIAIJ" 571 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 572 { 573 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 574 int ierr,nt; 575 576 PetscFunctionBegin; 577 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 578 if (nt != a->n) { 579 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 580 } 581 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 582 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 583 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 584 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNC__ 589 #define __FUNC__ "MatMultAdd_MPIAIJ" 590 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 591 { 592 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 593 int ierr; 594 595 PetscFunctionBegin; 596 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 597 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 598 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 599 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 600 PetscFunctionReturn(0); 601 } 602 603 #undef __FUNC__ 604 #define __FUNC__ "MatMultTrans_MPIAIJ" 605 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 606 { 607 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 608 int ierr; 609 610 PetscFunctionBegin; 611 /* do nondiagonal part */ 612 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 613 /* send it on its way */ 614 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 615 /* do local part */ 616 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 617 /* receive remote parts: note this assumes the values are not actually */ 618 /* inserted in yy until the next line, which is true for my implementation*/ 619 /* but is not perhaps always true. */ 620 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNC__ 625 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 626 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 627 { 628 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 629 int ierr; 630 631 PetscFunctionBegin; 632 /* do nondiagonal part */ 633 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 634 /* send it on its way */ 635 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 636 /* do local part */ 637 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 638 /* receive remote parts: note this assumes the values are not actually */ 639 /* inserted in yy until the next line, which is true for my implementation*/ 640 /* but is not perhaps always true. */ 641 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 642 PetscFunctionReturn(0); 643 } 644 645 /* 646 This only works correctly for square matrices where the subblock A->A is the 647 diagonal block 648 */ 649 #undef __FUNC__ 650 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 651 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 652 { 653 int ierr; 654 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 655 656 PetscFunctionBegin; 657 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 658 if (a->rstart != a->cstart || a->rend != a->cend) { 659 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 660 } 661 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 662 PetscFunctionReturn(0); 663 } 664 665 #undef __FUNC__ 666 #define __FUNC__ "MatScale_MPIAIJ" 667 int MatScale_MPIAIJ(Scalar *aa,Mat A) 668 { 669 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 670 int ierr; 671 672 PetscFunctionBegin; 673 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 674 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 675 PetscFunctionReturn(0); 676 } 677 678 #undef __FUNC__ 679 #define __FUNC__ "MatDestroy_MPIAIJ" 680 int MatDestroy_MPIAIJ(Mat mat) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 683 int ierr; 684 685 PetscFunctionBegin; 686 687 if (mat->mapping) { 688 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 689 } 690 if (mat->bmapping) { 691 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 692 } 693 if (mat->rmap) { 694 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 695 } 696 if (mat->cmap) { 697 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 698 } 699 #if defined(PETSC_USE_LOG) 700 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 701 #endif 702 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 703 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 704 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 705 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 706 #if defined (PETSC_USE_CTABLE) 707 if (aij->colmap) TableDelete(aij->colmap); 708 #else 709 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 710 #endif 711 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 712 if (aij->lvec) VecDestroy(aij->lvec); 713 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 714 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 715 ierr = PetscFree(aij);CHKERRQ(ierr); 716 PLogObjectDestroy(mat); 717 PetscHeaderDestroy(mat); 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNC__ 722 #define __FUNC__ "MatView_MPIAIJ_Binary" 723 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 724 { 725 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 726 int ierr; 727 728 PetscFunctionBegin; 729 if (aij->size == 1) { 730 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 731 } 732 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 733 PetscFunctionReturn(0); 734 } 735 736 #undef __FUNC__ 737 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 738 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 739 { 740 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 741 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 742 int ierr, format,shift = C->indexshift,rank = aij->rank ; 743 int size = aij->size; 744 FILE *fd; 745 ViewerType vtype; 746 747 PetscFunctionBegin; 748 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 749 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 750 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 751 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 752 MatInfo info; 753 int flg; 754 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 755 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 756 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 757 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 758 PetscSequentialPhaseBegin(mat->comm,1); 759 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 760 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 761 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 762 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 763 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 764 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 765 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 766 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 767 fflush(fd); 768 PetscSequentialPhaseEnd(mat->comm,1); 769 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 770 PetscFunctionReturn(0); 771 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 772 PetscFunctionReturn(0); 773 } 774 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 775 Draw draw; 776 PetscTruth isnull; 777 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 778 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 779 } 780 781 if (size == 1) { 782 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 783 } else { 784 /* assemble the entire matrix onto first processor. */ 785 Mat A; 786 Mat_SeqAIJ *Aloc; 787 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 788 Scalar *a; 789 790 if (!rank) { 791 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 792 } else { 793 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 794 } 795 PLogObjectParent(mat,A); 796 797 /* copy over the A part */ 798 Aloc = (Mat_SeqAIJ*) aij->A->data; 799 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 800 row = aij->rstart; 801 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 802 for ( i=0; i<m; i++ ) { 803 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 804 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 805 } 806 aj = Aloc->j; 807 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 808 809 /* copy over the B part */ 810 Aloc = (Mat_SeqAIJ*) aij->B->data; 811 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 812 row = aij->rstart; 813 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols); 814 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 815 for ( i=0; i<m; i++ ) { 816 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 817 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 818 } 819 ierr = PetscFree(ct);CHKERRQ(ierr); 820 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 821 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 /* 823 Everyone has to call to draw the matrix since the graphics waits are 824 synchronized across all processors that share the Draw object 825 */ 826 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 827 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer);CHKERRQ(ierr); 828 } 829 ierr = MatDestroy(A);CHKERRQ(ierr); 830 } 831 PetscFunctionReturn(0); 832 } 833 834 #undef __FUNC__ 835 #define __FUNC__ "MatView_MPIAIJ" 836 int MatView_MPIAIJ(Mat mat,Viewer viewer) 837 { 838 int ierr; 839 ViewerType vtype; 840 841 PetscFunctionBegin; 842 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 843 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 844 PetscTypeCompare(vtype,SOCKET_VIEWER) || PetscTypeCompare(vtype,BINARY_VIEWER)) { 845 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 846 /* 847 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 848 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 849 */ 850 } else { 851 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 852 } 853 PetscFunctionReturn(0); 854 } 855 856 /* 857 This has to provide several versions. 858 859 2) a) use only local smoothing updating outer values only once. 860 b) local smoothing updating outer values each inner iteration 861 3) color updating out values betwen colors. 862 */ 863 #undef __FUNC__ 864 #define __FUNC__ "MatRelax_MPIAIJ" 865 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 866 double fshift,int its,Vec xx) 867 { 868 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 869 Mat AA = mat->A, BB = mat->B; 870 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 871 Scalar *b,*x,*xs,*ls,d,*v,sum; 872 int ierr,*idx, *diag; 873 int n = mat->n, m = mat->m, i,shift = A->indexshift; 874 875 PetscFunctionBegin; 876 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 877 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 878 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 879 xs = x + shift; /* shift by one for index start of 1 */ 880 ls = ls + shift; 881 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);} 882 diag = A->diag; 883 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 884 if (flag & SOR_ZERO_INITIAL_GUESS) { 885 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 886 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 887 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 888 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 892 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 893 while (its--) { 894 /* go down through the rows */ 895 for ( i=0; i<m; i++ ) { 896 n = A->i[i+1] - A->i[i]; 897 PLogFlops(4*n+3); 898 idx = A->j + A->i[i] + shift; 899 v = A->a + A->i[i] + shift; 900 sum = b[i]; 901 SPARSEDENSEMDOT(sum,xs,v,idx,n); 902 d = fshift + A->a[diag[i]+shift]; 903 n = B->i[i+1] - B->i[i]; 904 idx = B->j + B->i[i] + shift; 905 v = B->a + B->i[i] + shift; 906 SPARSEDENSEMDOT(sum,ls,v,idx,n); 907 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 908 } 909 /* come up through the rows */ 910 for ( i=m-1; i>-1; i-- ) { 911 n = A->i[i+1] - A->i[i]; 912 PLogFlops(4*n+3) 913 idx = A->j + A->i[i] + shift; 914 v = A->a + A->i[i] + shift; 915 sum = b[i]; 916 SPARSEDENSEMDOT(sum,xs,v,idx,n); 917 d = fshift + A->a[diag[i]+shift]; 918 n = B->i[i+1] - B->i[i]; 919 idx = B->j + B->i[i] + shift; 920 v = B->a + B->i[i] + shift; 921 SPARSEDENSEMDOT(sum,ls,v,idx,n); 922 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 923 } 924 } 925 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 926 if (flag & SOR_ZERO_INITIAL_GUESS) { 927 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 928 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 929 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 930 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 931 PetscFunctionReturn(0); 932 } 933 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 934 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 935 while (its--) { 936 for ( i=0; i<m; i++ ) { 937 n = A->i[i+1] - A->i[i]; 938 PLogFlops(4*n+3); 939 idx = A->j + A->i[i] + shift; 940 v = A->a + A->i[i] + shift; 941 sum = b[i]; 942 SPARSEDENSEMDOT(sum,xs,v,idx,n); 943 d = fshift + A->a[diag[i]+shift]; 944 n = B->i[i+1] - B->i[i]; 945 idx = B->j + B->i[i] + shift; 946 v = B->a + B->i[i] + shift; 947 SPARSEDENSEMDOT(sum,ls,v,idx,n); 948 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 949 } 950 } 951 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 952 if (flag & SOR_ZERO_INITIAL_GUESS) { 953 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 954 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 955 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 956 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 957 PetscFunctionReturn(0); 958 } 959 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 960 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 961 while (its--) { 962 for ( i=m-1; i>-1; i-- ) { 963 n = A->i[i+1] - A->i[i]; 964 PLogFlops(4*n+3); 965 idx = A->j + A->i[i] + shift; 966 v = A->a + A->i[i] + shift; 967 sum = b[i]; 968 SPARSEDENSEMDOT(sum,xs,v,idx,n); 969 d = fshift + A->a[diag[i]+shift]; 970 n = B->i[i+1] - B->i[i]; 971 idx = B->j + B->i[i] + shift; 972 v = B->a + B->i[i] + shift; 973 SPARSEDENSEMDOT(sum,ls,v,idx,n); 974 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 975 } 976 } 977 } else { 978 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 979 } 980 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 981 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 982 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } 985 986 #undef __FUNC__ 987 #define __FUNC__ "MatGetInfo_MPIAIJ" 988 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 989 { 990 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 991 Mat A = mat->A, B = mat->B; 992 int ierr; 993 double isend[5], irecv[5]; 994 995 PetscFunctionBegin; 996 info->block_size = 1.0; 997 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 998 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 999 isend[3] = info->memory; isend[4] = info->mallocs; 1000 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1001 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1002 isend[3] += info->memory; isend[4] += info->mallocs; 1003 if (flag == MAT_LOCAL) { 1004 info->nz_used = isend[0]; 1005 info->nz_allocated = isend[1]; 1006 info->nz_unneeded = isend[2]; 1007 info->memory = isend[3]; 1008 info->mallocs = isend[4]; 1009 } else if (flag == MAT_GLOBAL_MAX) { 1010 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1011 info->nz_used = irecv[0]; 1012 info->nz_allocated = irecv[1]; 1013 info->nz_unneeded = irecv[2]; 1014 info->memory = irecv[3]; 1015 info->mallocs = irecv[4]; 1016 } else if (flag == MAT_GLOBAL_SUM) { 1017 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1018 info->nz_used = irecv[0]; 1019 info->nz_allocated = irecv[1]; 1020 info->nz_unneeded = irecv[2]; 1021 info->memory = irecv[3]; 1022 info->mallocs = irecv[4]; 1023 } 1024 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1025 info->fill_ratio_needed = 0; 1026 info->factor_mallocs = 0; 1027 info->rows_global = (double)mat->M; 1028 info->columns_global = (double)mat->N; 1029 info->rows_local = (double)mat->m; 1030 info->columns_local = (double)mat->N; 1031 1032 PetscFunctionReturn(0); 1033 } 1034 1035 #undef __FUNC__ 1036 #define __FUNC__ "MatSetOption_MPIAIJ" 1037 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1038 { 1039 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1040 int ierr; 1041 1042 PetscFunctionBegin; 1043 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1044 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1045 op == MAT_COLUMNS_UNSORTED || 1046 op == MAT_COLUMNS_SORTED || 1047 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1048 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1049 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1050 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1051 } else if (op == MAT_ROW_ORIENTED) { 1052 a->roworiented = 1; 1053 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1054 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1055 } else if (op == MAT_ROWS_SORTED || 1056 op == MAT_ROWS_UNSORTED || 1057 op == MAT_SYMMETRIC || 1058 op == MAT_STRUCTURALLY_SYMMETRIC || 1059 op == MAT_YES_NEW_DIAGONALS) 1060 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1061 else if (op == MAT_COLUMN_ORIENTED) { 1062 a->roworiented = 0; 1063 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1064 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1065 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1066 a->donotstash = 1; 1067 } else if (op == MAT_NO_NEW_DIAGONALS){ 1068 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1069 } else { 1070 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1071 } 1072 PetscFunctionReturn(0); 1073 } 1074 1075 #undef __FUNC__ 1076 #define __FUNC__ "MatGetSize_MPIAIJ" 1077 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1078 { 1079 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1080 1081 PetscFunctionBegin; 1082 if (m) *m = mat->M; 1083 if (n) *n = mat->N; 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNC__ 1088 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1089 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1090 { 1091 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1092 1093 PetscFunctionBegin; 1094 if (m) *m = mat->m; 1095 if (n) *n = mat->n; 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNC__ 1100 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1101 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1102 { 1103 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1104 1105 PetscFunctionBegin; 1106 *m = mat->rstart; *n = mat->rend; 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNC__ 1111 #define __FUNC__ "MatGetRow_MPIAIJ" 1112 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1113 { 1114 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1115 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1116 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1117 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1118 int *cmap, *idx_p; 1119 1120 PetscFunctionBegin; 1121 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1122 mat->getrowactive = PETSC_TRUE; 1123 1124 if (!mat->rowvalues && (idx || v)) { 1125 /* 1126 allocate enough space to hold information from the longest row. 1127 */ 1128 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1129 int max = 1,m = mat->m,tmp; 1130 for ( i=0; i<m; i++ ) { 1131 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1132 if (max < tmp) { max = tmp; } 1133 } 1134 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1135 mat->rowindices = (int *) (mat->rowvalues + max); 1136 } 1137 1138 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1139 lrow = row - rstart; 1140 1141 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1142 if (!v) {pvA = 0; pvB = 0;} 1143 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1144 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1145 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1146 nztot = nzA + nzB; 1147 1148 cmap = mat->garray; 1149 if (v || idx) { 1150 if (nztot) { 1151 /* Sort by increasing column numbers, assuming A and B already sorted */ 1152 int imark = -1; 1153 if (v) { 1154 *v = v_p = mat->rowvalues; 1155 for ( i=0; i<nzB; i++ ) { 1156 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1157 else break; 1158 } 1159 imark = i; 1160 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1161 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1162 } 1163 if (idx) { 1164 *idx = idx_p = mat->rowindices; 1165 if (imark > -1) { 1166 for ( i=0; i<imark; i++ ) { 1167 idx_p[i] = cmap[cworkB[i]]; 1168 } 1169 } else { 1170 for ( i=0; i<nzB; i++ ) { 1171 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1172 else break; 1173 } 1174 imark = i; 1175 } 1176 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1177 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1178 } 1179 } else { 1180 if (idx) *idx = 0; 1181 if (v) *v = 0; 1182 } 1183 } 1184 *nz = nztot; 1185 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1186 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNC__ 1191 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1192 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1193 { 1194 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1195 1196 PetscFunctionBegin; 1197 if (aij->getrowactive == PETSC_FALSE) { 1198 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1199 } 1200 aij->getrowactive = PETSC_FALSE; 1201 PetscFunctionReturn(0); 1202 } 1203 1204 #undef __FUNC__ 1205 #define __FUNC__ "MatNorm_MPIAIJ" 1206 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1207 { 1208 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1209 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1210 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1211 double sum = 0.0; 1212 Scalar *v; 1213 1214 PetscFunctionBegin; 1215 if (aij->size == 1) { 1216 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1217 } else { 1218 if (type == NORM_FROBENIUS) { 1219 v = amat->a; 1220 for (i=0; i<amat->nz; i++ ) { 1221 #if defined(PETSC_USE_COMPLEX) 1222 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1223 #else 1224 sum += (*v)*(*v); v++; 1225 #endif 1226 } 1227 v = bmat->a; 1228 for (i=0; i<bmat->nz; i++ ) { 1229 #if defined(PETSC_USE_COMPLEX) 1230 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1231 #else 1232 sum += (*v)*(*v); v++; 1233 #endif 1234 } 1235 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1236 *norm = sqrt(*norm); 1237 } else if (type == NORM_1) { /* max column norm */ 1238 double *tmp, *tmp2; 1239 int *jj, *garray = aij->garray; 1240 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp); 1241 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2); 1242 ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr); 1243 *norm = 0.0; 1244 v = amat->a; jj = amat->j; 1245 for ( j=0; j<amat->nz; j++ ) { 1246 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1247 } 1248 v = bmat->a; jj = bmat->j; 1249 for ( j=0; j<bmat->nz; j++ ) { 1250 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1251 } 1252 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1253 for ( j=0; j<aij->N; j++ ) { 1254 if (tmp2[j] > *norm) *norm = tmp2[j]; 1255 } 1256 ierr = PetscFree(tmp);CHKERRQ(ierr); 1257 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1258 } else if (type == NORM_INFINITY) { /* max row norm */ 1259 double ntemp = 0.0; 1260 for ( j=0; j<amat->m; j++ ) { 1261 v = amat->a + amat->i[j] + shift; 1262 sum = 0.0; 1263 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1264 sum += PetscAbsScalar(*v); v++; 1265 } 1266 v = bmat->a + bmat->i[j] + shift; 1267 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1268 sum += PetscAbsScalar(*v); v++; 1269 } 1270 if (sum > ntemp) ntemp = sum; 1271 } 1272 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1273 } else { 1274 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1275 } 1276 } 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #undef __FUNC__ 1281 #define __FUNC__ "MatTranspose_MPIAIJ" 1282 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1283 { 1284 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1285 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1286 int ierr,shift = Aloc->indexshift; 1287 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1288 Mat B; 1289 Scalar *array; 1290 1291 PetscFunctionBegin; 1292 if (matout == PETSC_NULL && M != N) { 1293 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1294 } 1295 1296 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1297 1298 /* copy over the A part */ 1299 Aloc = (Mat_SeqAIJ*) a->A->data; 1300 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1301 row = a->rstart; 1302 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1303 for ( i=0; i<m; i++ ) { 1304 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1305 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1306 } 1307 aj = Aloc->j; 1308 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1309 1310 /* copy over the B part */ 1311 Aloc = (Mat_SeqAIJ*) a->B->data; 1312 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1313 row = a->rstart; 1314 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols); 1315 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1316 for ( i=0; i<m; i++ ) { 1317 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1318 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1319 } 1320 ierr = PetscFree(ct);CHKERRQ(ierr); 1321 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1322 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1323 if (matout != PETSC_NULL) { 1324 *matout = B; 1325 } else { 1326 PetscOps *Abops; 1327 struct _MatOps *Aops; 1328 1329 /* This isn't really an in-place transpose .... but free data structures from a */ 1330 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 1331 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1332 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1333 #if defined (PETSC_USE_CTABLE) 1334 if (a->colmap) TableDelete(a->colmap); 1335 #else 1336 if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);} 1337 #endif 1338 if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);} 1339 if (a->lvec) VecDestroy(a->lvec); 1340 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1341 ierr = PetscFree(a);CHKERRQ(ierr); 1342 1343 /* 1344 This is horrible, horrible code. We need to keep the 1345 A pointers for the bops and ops but copy everything 1346 else from C. 1347 */ 1348 Abops = A->bops; 1349 Aops = A->ops; 1350 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1351 A->bops = Abops; 1352 A->ops = Aops; 1353 PetscHeaderDestroy(B); 1354 } 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #undef __FUNC__ 1359 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1360 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1361 { 1362 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1363 Mat a = aij->A, b = aij->B; 1364 int ierr,s1,s2,s3; 1365 1366 PetscFunctionBegin; 1367 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1368 if (rr) { 1369 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1370 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1371 /* Overlap communication with computation. */ 1372 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1373 } 1374 if (ll) { 1375 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1376 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1377 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1378 } 1379 /* scale the diagonal block */ 1380 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1381 1382 if (rr) { 1383 /* Do a scatter end and then right scale the off-diagonal block */ 1384 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1385 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1386 } 1387 1388 PetscFunctionReturn(0); 1389 } 1390 1391 1392 #undef __FUNC__ 1393 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1394 int MatPrintHelp_MPIAIJ(Mat A) 1395 { 1396 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1397 int ierr; 1398 1399 PetscFunctionBegin; 1400 if (!a->rank) { 1401 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1402 } 1403 PetscFunctionReturn(0); 1404 } 1405 1406 #undef __FUNC__ 1407 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1408 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1409 { 1410 PetscFunctionBegin; 1411 *bs = 1; 1412 PetscFunctionReturn(0); 1413 } 1414 #undef __FUNC__ 1415 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1416 int MatSetUnfactored_MPIAIJ(Mat A) 1417 { 1418 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1419 int ierr; 1420 1421 PetscFunctionBegin; 1422 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1423 PetscFunctionReturn(0); 1424 } 1425 1426 #undef __FUNC__ 1427 #define __FUNC__ "MatEqual_MPIAIJ" 1428 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1429 { 1430 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1431 Mat a, b, c, d; 1432 PetscTruth flg; 1433 int ierr; 1434 1435 PetscFunctionBegin; 1436 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1437 a = matA->A; b = matA->B; 1438 c = matB->A; d = matB->B; 1439 1440 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1441 if (flg == PETSC_TRUE) { 1442 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1443 } 1444 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 #undef __FUNC__ 1449 #define __FUNC__ "MatCopy_MPIAIJ" 1450 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1451 { 1452 int ierr; 1453 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1454 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1455 1456 PetscFunctionBegin; 1457 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1458 /* because of the column compression in the off-processor part of the matrix a->B, 1459 the number of columns in a->B and b->B may be different, hence we cannot call 1460 the MatCopy() directly on the two parts. If need be, we can provide a more 1461 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1462 then copying the submatrices */ 1463 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1464 } else { 1465 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1466 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1467 } 1468 PetscFunctionReturn(0); 1469 } 1470 1471 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1472 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1473 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1474 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 1475 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 1476 1477 /* -------------------------------------------------------------------*/ 1478 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1479 MatGetRow_MPIAIJ, 1480 MatRestoreRow_MPIAIJ, 1481 MatMult_MPIAIJ, 1482 MatMultAdd_MPIAIJ, 1483 MatMultTrans_MPIAIJ, 1484 MatMultTransAdd_MPIAIJ, 1485 0, 1486 0, 1487 0, 1488 0, 1489 0, 1490 0, 1491 MatRelax_MPIAIJ, 1492 MatTranspose_MPIAIJ, 1493 MatGetInfo_MPIAIJ, 1494 MatEqual_MPIAIJ, 1495 MatGetDiagonal_MPIAIJ, 1496 MatDiagonalScale_MPIAIJ, 1497 MatNorm_MPIAIJ, 1498 MatAssemblyBegin_MPIAIJ, 1499 MatAssemblyEnd_MPIAIJ, 1500 0, 1501 MatSetOption_MPIAIJ, 1502 MatZeroEntries_MPIAIJ, 1503 MatZeroRows_MPIAIJ, 1504 0, 1505 0, 1506 0, 1507 0, 1508 MatGetSize_MPIAIJ, 1509 MatGetLocalSize_MPIAIJ, 1510 MatGetOwnershipRange_MPIAIJ, 1511 0, 1512 0, 1513 0, 1514 0, 1515 MatDuplicate_MPIAIJ, 1516 0, 1517 0, 1518 0, 1519 0, 1520 0, 1521 MatGetSubMatrices_MPIAIJ, 1522 MatIncreaseOverlap_MPIAIJ, 1523 MatGetValues_MPIAIJ, 1524 MatCopy_MPIAIJ, 1525 MatPrintHelp_MPIAIJ, 1526 MatScale_MPIAIJ, 1527 0, 1528 0, 1529 0, 1530 MatGetBlockSize_MPIAIJ, 1531 0, 1532 0, 1533 0, 1534 0, 1535 MatFDColoringCreate_MPIAIJ, 1536 0, 1537 MatSetUnfactored_MPIAIJ, 1538 0, 1539 0, 1540 MatGetSubMatrix_MPIAIJ, 1541 0, 1542 0, 1543 MatGetMaps_Petsc}; 1544 1545 /* ----------------------------------------------------------------------------------------*/ 1546 1547 EXTERN_C_BEGIN 1548 #undef __FUNC__ 1549 #define __FUNC__ "MatStoreValues_MPIAIJ" 1550 int MatStoreValues_MPIAIJ(Mat mat) 1551 { 1552 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1553 int ierr; 1554 1555 PetscFunctionBegin; 1556 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1557 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 EXTERN_C_END 1561 1562 EXTERN_C_BEGIN 1563 #undef __FUNC__ 1564 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1565 int MatRetrieveValues_MPIAIJ(Mat mat) 1566 { 1567 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1568 int ierr; 1569 1570 PetscFunctionBegin; 1571 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1572 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1573 PetscFunctionReturn(0); 1574 } 1575 EXTERN_C_END 1576 1577 #include "pc.h" 1578 EXTERN_C_BEGIN 1579 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1580 EXTERN_C_END 1581 1582 #undef __FUNC__ 1583 #define __FUNC__ "MatCreateMPIAIJ" 1584 /*@C 1585 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1586 (the default parallel PETSc format). For good matrix assembly performance 1587 the user should preallocate the matrix storage by setting the parameters 1588 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1589 performance can be increased by more than a factor of 50. 1590 1591 Collective on MPI_Comm 1592 1593 Input Parameters: 1594 + comm - MPI communicator 1595 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1596 This value should be the same as the local size used in creating the 1597 y vector for the matrix-vector product y = Ax. 1598 . n - This value should be the same as the local size used in creating the 1599 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1600 calculated if N is given) For square matrices n is almost always m. 1601 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1602 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1603 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1604 (same value is used for all local rows) 1605 . d_nnz - array containing the number of nonzeros in the various rows of the 1606 DIAGONAL portion of the local submatrix (possibly different for each row) 1607 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1608 The size of this array is equal to the number of local rows, i.e 'm'. 1609 You must leave room for the diagonal entry even if it is zero. 1610 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1611 submatrix (same value is used for all local rows). 1612 - o_nnz - array containing the number of nonzeros in the various rows of the 1613 OFF-DIAGONAL portion of the local submatrix (possibly different for 1614 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1615 structure. The size of this array is equal to the number 1616 of local rows, i.e 'm'. 1617 1618 Output Parameter: 1619 . A - the matrix 1620 1621 Notes: 1622 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1623 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1624 storage requirements for this matrix. 1625 1626 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1627 processor than it must be used on all processors that share the object for 1628 that argument. 1629 1630 The AIJ format (also called the Yale sparse matrix format or 1631 compressed row storage), is fully compatible with standard Fortran 77 1632 storage. That is, the stored row and column indices can begin at 1633 either one (as in Fortran) or zero. See the users manual for details. 1634 1635 The user MUST specify either the local or global matrix dimensions 1636 (possibly both). 1637 1638 The parallel matrix is partitioned such that the first m0 rows belong to 1639 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1640 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1641 1642 The DIAGONAL portion of the local submatrix of a processor can be defined 1643 as the submatrix which is obtained by extraction the part corresponding 1644 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1645 first row that belongs to the processor, and r2 is the last row belonging 1646 to the this processor. This is a square mxm matrix. The remaining portion 1647 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1648 1649 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1650 1651 By default, this format uses inodes (identical nodes) when possible. 1652 We search for consecutive rows with the same nonzero structure, thereby 1653 reusing matrix information to achieve increased efficiency. 1654 1655 Options Database Keys: 1656 + -mat_aij_no_inode - Do not use inodes 1657 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1658 - -mat_aij_oneindex - Internally use indexing starting at 1 1659 rather than 0. Note that when calling MatSetValues(), 1660 the user still MUST index entries starting at 0! 1661 . -mat_mpi - use the parallel matrix data structures even on one processor 1662 (defaults to using SeqBAIJ format on one processor) 1663 . -mat_mpi - use the parallel matrix data structures even on one processor 1664 (defaults to using SeqAIJ format on one processor) 1665 1666 1667 Example usage: 1668 1669 Consider the following 8x8 matrix with 34 non-zero values, that is 1670 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1671 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1672 as follows: 1673 1674 .vb 1675 1 2 0 | 0 3 0 | 0 4 1676 Proc0 0 5 6 | 7 0 0 | 8 0 1677 9 0 10 | 11 0 0 | 12 0 1678 ------------------------------------- 1679 13 0 14 | 15 16 17 | 0 0 1680 Proc1 0 18 0 | 19 20 21 | 0 0 1681 0 0 0 | 22 23 0 | 24 0 1682 ------------------------------------- 1683 Proc2 25 26 27 | 0 0 28 | 29 0 1684 30 0 0 | 31 32 33 | 0 34 1685 .ve 1686 1687 This can be represented as a collection of submatrices as: 1688 1689 .vb 1690 A B C 1691 D E F 1692 G H I 1693 .ve 1694 1695 Where the submatrices A,B,C are owned by proc0, D,E,F are 1696 owned by proc1, G,H,I are owned by proc2. 1697 1698 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1699 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1700 The 'M','N' parameters are 8,8, and have the same values on all procs. 1701 1702 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1703 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1704 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1705 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1706 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1707 matrix, ans [DF] as another SeqAIJ matrix. 1708 1709 When d_nz, o_nz parameters are specified, d_nz storage elements are 1710 allocated for every row of the local diagonal submatrix, and o_nz 1711 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1712 One way to choose d_nz and o_nz is to use the max nonzerors per local 1713 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1714 In this case, the values of d_nz,o_nz are: 1715 .vb 1716 proc0 : dnz = 2, o_nz = 2 1717 proc1 : dnz = 3, o_nz = 2 1718 proc2 : dnz = 1, o_nz = 4 1719 .ve 1720 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1721 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1722 for proc3. i.e we are using 12+15+10=37 storage locations to store 1723 34 values. 1724 1725 When d_nnz, o_nnz parameters are specified, the storage is specified 1726 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1727 In the above case the values for d_nnz,o_nnz are: 1728 .vb 1729 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1730 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1731 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1732 .ve 1733 Here the space allocated is sum of all the above values i.e 34, and 1734 hence pre-allocation is perfect. 1735 1736 Level: intermediate 1737 1738 .keywords: matrix, aij, compressed row, sparse, parallel 1739 1740 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1741 @*/ 1742 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1743 { 1744 Mat B; 1745 Mat_MPIAIJ *b; 1746 int ierr, i,size,flag1 = 0, flag2 = 0; 1747 1748 PetscFunctionBegin; 1749 1750 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz); 1751 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz); 1752 if (d_nnz) { 1753 for (i=0; i<m; i++) { 1754 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1755 } 1756 } 1757 if (o_nnz) { 1758 for (i=0; i<m; i++) { 1759 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1760 } 1761 } 1762 1763 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1764 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr); 1765 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1766 if (!flag1 && !flag2 && size == 1) { 1767 if (M == PETSC_DECIDE) M = m; 1768 if (N == PETSC_DECIDE) N = n; 1769 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 1770 PetscFunctionReturn(0); 1771 } 1772 1773 *A = 0; 1774 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1775 PLogObjectCreate(B); 1776 B->data = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b); 1777 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1778 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1779 B->ops->destroy = MatDestroy_MPIAIJ; 1780 B->ops->view = MatView_MPIAIJ; 1781 B->factor = 0; 1782 B->assembled = PETSC_FALSE; 1783 B->mapping = 0; 1784 1785 B->insertmode = NOT_SET_VALUES; 1786 b->size = size; 1787 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1788 1789 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1790 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1791 } 1792 1793 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1794 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1795 b->m = m; B->m = m; 1796 b->n = n; B->n = n; 1797 b->N = N; B->N = N; 1798 b->M = M; B->M = M; 1799 1800 /* the information in the maps duplicates the information computed below, eventually 1801 we should remove the duplicate information that is not contained in the maps */ 1802 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1803 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1804 1805 /* build local table of row and column ownerships */ 1806 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1807 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1808 b->cowners = b->rowners + b->size + 2; 1809 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1810 b->rowners[0] = 0; 1811 for ( i=2; i<=b->size; i++ ) { 1812 b->rowners[i] += b->rowners[i-1]; 1813 } 1814 b->rstart = b->rowners[b->rank]; 1815 b->rend = b->rowners[b->rank+1]; 1816 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1817 b->cowners[0] = 0; 1818 for ( i=2; i<=b->size; i++ ) { 1819 b->cowners[i] += b->cowners[i-1]; 1820 } 1821 b->cstart = b->cowners[b->rank]; 1822 b->cend = b->cowners[b->rank+1]; 1823 1824 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1825 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1826 PLogObjectParent(B,b->A); 1827 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1828 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1829 PLogObjectParent(B,b->B); 1830 1831 /* build cache for off array entries formed */ 1832 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 1833 b->donotstash = 0; 1834 b->colmap = 0; 1835 b->garray = 0; 1836 b->roworiented = 1; 1837 1838 /* stuff used for matrix vector multiply */ 1839 b->lvec = 0; 1840 b->Mvctx = 0; 1841 1842 /* stuff for MatGetRow() */ 1843 b->rowindices = 0; 1844 b->rowvalues = 0; 1845 b->getrowactive = PETSC_FALSE; 1846 1847 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1848 "MatStoreValues_MPIAIJ", 1849 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1850 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1851 "MatRetrieveValues_MPIAIJ", 1852 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1853 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 1854 "MatGetDiagonalBlock_MPIAIJ", 1855 (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1856 *A = B; 1857 PetscFunctionReturn(0); 1858 } 1859 1860 #undef __FUNC__ 1861 #define __FUNC__ "MatDuplicate_MPIAIJ" 1862 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1863 { 1864 Mat mat; 1865 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1866 int ierr, len=0, flg; 1867 1868 PetscFunctionBegin; 1869 *newmat = 0; 1870 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1871 PLogObjectCreate(mat); 1872 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a); 1873 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1874 mat->ops->destroy = MatDestroy_MPIAIJ; 1875 mat->ops->view = MatView_MPIAIJ; 1876 mat->factor = matin->factor; 1877 mat->assembled = PETSC_TRUE; 1878 mat->insertmode = NOT_SET_VALUES; 1879 1880 a->m = mat->m = oldmat->m; 1881 a->n = mat->n = oldmat->n; 1882 a->M = mat->M = oldmat->M; 1883 a->N = mat->N = oldmat->N; 1884 1885 a->rstart = oldmat->rstart; 1886 a->rend = oldmat->rend; 1887 a->cstart = oldmat->cstart; 1888 a->cend = oldmat->cend; 1889 a->size = oldmat->size; 1890 a->rank = oldmat->rank; 1891 a->donotstash = oldmat->donotstash; 1892 a->roworiented = oldmat->roworiented; 1893 a->rowindices = 0; 1894 a->rowvalues = 0; 1895 a->getrowactive = PETSC_FALSE; 1896 1897 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1898 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1899 a->cowners = a->rowners + a->size + 2; 1900 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1901 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1902 if (oldmat->colmap) { 1903 #if defined (PETSC_USE_CTABLE) 1904 ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1905 #else 1906 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1907 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1908 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr); 1909 #endif 1910 } else a->colmap = 0; 1911 if (oldmat->garray) { 1912 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1913 a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray); 1914 PLogObjectMemory(mat,len*sizeof(int)); 1915 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1916 } else a->garray = 0; 1917 1918 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1919 PLogObjectParent(mat,a->lvec); 1920 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1921 PLogObjectParent(mat,a->Mvctx); 1922 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1923 PLogObjectParent(mat,a->A); 1924 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1925 PLogObjectParent(mat,a->B); 1926 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1927 if (flg) { 1928 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1929 } 1930 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1931 *newmat = mat; 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #include "sys.h" 1936 1937 #undef __FUNC__ 1938 #define __FUNC__ "MatLoad_MPIAIJ" 1939 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1940 { 1941 Mat A; 1942 Scalar *vals,*svals; 1943 MPI_Comm comm = ((PetscObject)viewer)->comm; 1944 MPI_Status status; 1945 int i, nz, ierr, j,rstart, rend, fd; 1946 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1947 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1948 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1949 1950 PetscFunctionBegin; 1951 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1952 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1953 if (!rank) { 1954 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1955 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1956 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1957 if (header[3] < 0) { 1958 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1959 } 1960 } 1961 1962 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1963 M = header[1]; N = header[2]; 1964 /* determine ownership of all rows */ 1965 m = M/size + ((M % size) > rank); 1966 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1967 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1968 rowners[0] = 0; 1969 for ( i=2; i<=size; i++ ) { 1970 rowners[i] += rowners[i-1]; 1971 } 1972 rstart = rowners[rank]; 1973 rend = rowners[rank+1]; 1974 1975 /* distribute row lengths to all processors */ 1976 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1977 offlens = ourlens + (rend-rstart); 1978 if (!rank) { 1979 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1980 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1981 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 1982 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1983 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1984 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1985 } else { 1986 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1987 } 1988 1989 if (!rank) { 1990 /* calculate the number of nonzeros on each processor */ 1991 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 1992 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1993 for ( i=0; i<size; i++ ) { 1994 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1995 procsnz[i] += rowlengths[j]; 1996 } 1997 } 1998 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1999 2000 /* determine max buffer needed and allocate it */ 2001 maxnz = 0; 2002 for ( i=0; i<size; i++ ) { 2003 maxnz = PetscMax(maxnz,procsnz[i]); 2004 } 2005 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 2006 2007 /* read in my part of the matrix column indices */ 2008 nz = procsnz[0]; 2009 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 2010 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2011 2012 /* read in every one elses and ship off */ 2013 for ( i=1; i<size; i++ ) { 2014 nz = procsnz[i]; 2015 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2016 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2017 } 2018 ierr = PetscFree(cols);CHKERRQ(ierr); 2019 } else { 2020 /* determine buffer space needed for message */ 2021 nz = 0; 2022 for ( i=0; i<m; i++ ) { 2023 nz += ourlens[i]; 2024 } 2025 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 2026 2027 /* receive message of column indices*/ 2028 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2029 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2030 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2031 } 2032 2033 /* determine column ownership if matrix is not square */ 2034 if (N != M) { 2035 n = N/size + ((N % size) > rank); 2036 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2037 cstart = cend - n; 2038 } else { 2039 cstart = rstart; 2040 cend = rend; 2041 n = cend - cstart; 2042 } 2043 2044 /* loop over local rows, determining number of off diagonal entries */ 2045 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2046 jj = 0; 2047 for ( i=0; i<m; i++ ) { 2048 for ( j=0; j<ourlens[i]; j++ ) { 2049 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2050 jj++; 2051 } 2052 } 2053 2054 /* create our matrix */ 2055 for ( i=0; i<m; i++ ) { 2056 ourlens[i] -= offlens[i]; 2057 } 2058 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2059 A = *newmat; 2060 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2061 for ( i=0; i<m; i++ ) { 2062 ourlens[i] += offlens[i]; 2063 } 2064 2065 if (!rank) { 2066 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 2067 2068 /* read in my part of the matrix numerical values */ 2069 nz = procsnz[0]; 2070 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2071 2072 /* insert into matrix */ 2073 jj = rstart; 2074 smycols = mycols; 2075 svals = vals; 2076 for ( i=0; i<m; i++ ) { 2077 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2078 smycols += ourlens[i]; 2079 svals += ourlens[i]; 2080 jj++; 2081 } 2082 2083 /* read in other processors and ship out */ 2084 for ( i=1; i<size; i++ ) { 2085 nz = procsnz[i]; 2086 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2087 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2088 } 2089 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2090 } else { 2091 /* receive numeric values */ 2092 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 2093 2094 /* receive message of values*/ 2095 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2096 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2097 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2098 2099 /* insert into matrix */ 2100 jj = rstart; 2101 smycols = mycols; 2102 svals = vals; 2103 for ( i=0; i<m; i++ ) { 2104 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2105 smycols += ourlens[i]; 2106 svals += ourlens[i]; 2107 jj++; 2108 } 2109 } 2110 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2111 ierr = PetscFree(vals);CHKERRQ(ierr); 2112 ierr = PetscFree(mycols);CHKERRQ(ierr); 2113 ierr = PetscFree(rowners);CHKERRQ(ierr); 2114 2115 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2116 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2117 PetscFunctionReturn(0); 2118 } 2119 2120 #undef __FUNC__ 2121 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2122 /* 2123 Not great since it makes two copies of the submatrix, first an SeqAIJ 2124 in local and then by concatenating the local matrices the end result. 2125 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2126 */ 2127 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2128 { 2129 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2130 Mat *local,M, Mreuse; 2131 Scalar *vwork,*aa; 2132 MPI_Comm comm = mat->comm; 2133 Mat_SeqAIJ *aij; 2134 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2135 2136 PetscFunctionBegin; 2137 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2138 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2139 2140 if (call == MAT_REUSE_MATRIX) { 2141 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2142 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2143 local = &Mreuse; 2144 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2145 } else { 2146 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2147 Mreuse = *local; 2148 ierr = PetscFree(local);CHKERRQ(ierr); 2149 } 2150 2151 /* 2152 m - number of local rows 2153 n - number of columns (same on all processors) 2154 rstart - first row in new global matrix generated 2155 */ 2156 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2157 if (call == MAT_INITIAL_MATRIX) { 2158 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2159 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2160 ii = aij->i; 2161 jj = aij->j; 2162 2163 /* 2164 Determine the number of non-zeros in the diagonal and off-diagonal 2165 portions of the matrix in order to do correct preallocation 2166 */ 2167 2168 /* first get start and end of "diagonal" columns */ 2169 if (csize == PETSC_DECIDE) { 2170 nlocal = n/size + ((n % size) > rank); 2171 } else { 2172 nlocal = csize; 2173 } 2174 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2175 rstart = rend - nlocal; 2176 if (rank == size - 1 && rend != n) { 2177 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2178 } 2179 2180 /* next, compute all the lengths */ 2181 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2182 olens = dlens + m; 2183 for ( i=0; i<m; i++ ) { 2184 jend = ii[i+1] - ii[i]; 2185 olen = 0; 2186 dlen = 0; 2187 for ( j=0; j<jend; j++ ) { 2188 if ( *jj < rstart || *jj >= rend) olen++; 2189 else dlen++; 2190 jj++; 2191 } 2192 olens[i] = olen; 2193 dlens[i] = dlen; 2194 } 2195 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2196 ierr = PetscFree(dlens);CHKERRQ(ierr); 2197 } else { 2198 int ml,nl; 2199 2200 M = *newmat; 2201 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2202 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2203 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2204 /* 2205 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2206 rather than the slower MatSetValues(). 2207 */ 2208 M->was_assembled = PETSC_TRUE; 2209 M->assembled = PETSC_FALSE; 2210 } 2211 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2212 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2213 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2214 ii = aij->i; 2215 jj = aij->j; 2216 aa = aij->a; 2217 for (i=0; i<m; i++) { 2218 row = rstart + i; 2219 nz = ii[i+1] - ii[i]; 2220 cwork = jj; jj += nz; 2221 vwork = aa; aa += nz; 2222 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2223 } 2224 2225 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2226 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2227 *newmat = M; 2228 2229 /* save submatrix used in processor for next request */ 2230 if (call == MAT_INITIAL_MATRIX) { 2231 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2232 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2233 } 2234 2235 PetscFunctionReturn(0); 2236 } 2237