1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.300 1999/09/15 02:05:29 bsmith Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 extern int MatSetUpMultiply_MPIAIJ(Mat); 10 extern int DisAssemble_MPIAIJ(Mat); 11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 14 extern int MatPrintHelp_SeqAIJ(Mat); 15 16 /* local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 26 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 27 int n = B->n,i,ierr; 28 29 PetscFunctionBegin; 30 #if defined (PETSC_USE_CTABLE) 31 ierr = TableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr); 32 for ( i=0; i<n; i++ ){ 33 ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 34 } 35 #else 36 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 37 PLogObjectMemory(mat,aij->N*sizeof(int)); 38 ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr); 39 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 40 #endif 41 PetscFunctionReturn(0); 42 } 43 44 #define CHUNKSIZE 15 45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 46 { \ 47 \ 48 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 49 rmax = aimax[row]; nrow = ailen[row]; \ 50 col1 = col - shift; \ 51 \ 52 low = 0; high = nrow; \ 53 while (high-low > 5) { \ 54 t = (low+high)/2; \ 55 if (rp[t] > col) high = t; \ 56 else low = t; \ 57 } \ 58 for ( _i=0; _i<nrow; _i++ ) { \ 59 if (rp[_i] > col1) break; \ 60 if (rp[_i] == col1) { \ 61 if (addv == ADD_VALUES) ap[_i] += value; \ 62 else ap[_i] = value; \ 63 goto a_noinsert; \ 64 } \ 65 } \ 66 if (nonew == 1) goto a_noinsert; \ 67 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 68 if (nrow >= rmax) { \ 69 /* there is no extra room in row, therefore enlarge */ \ 70 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 71 Scalar *new_a; \ 72 \ 73 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 74 \ 75 /* malloc new storage space */ \ 76 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 77 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 78 new_j = (int *) (new_a + new_nz); \ 79 new_i = new_j + new_nz; \ 80 \ 81 /* copy over old data into new slots */ \ 82 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 83 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 84 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 85 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 86 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 87 len*sizeof(int));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 90 len*sizeof(Scalar));CHKERRQ(ierr); \ 91 /* free up old matrix storage */ \ 92 \ 93 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 94 if (!a->singlemalloc) { \ 95 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 96 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 97 } \ 98 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 99 a->singlemalloc = 1; \ 100 \ 101 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 102 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 103 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 104 a->maxnz += CHUNKSIZE; \ 105 a->reallocs++; \ 106 } \ 107 N = nrow++ - 1; a->nz++; \ 108 /* shift up all the later entries in this row */ \ 109 for ( ii=N; ii>=_i; ii-- ) { \ 110 rp[ii+1] = rp[ii]; \ 111 ap[ii+1] = ap[ii]; \ 112 } \ 113 rp[_i] = col1; \ 114 ap[_i] = value; \ 115 a_noinsert: ; \ 116 ailen[row] = nrow; \ 117 } 118 119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 120 { \ 121 \ 122 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 123 rmax = bimax[row]; nrow = bilen[row]; \ 124 col1 = col - shift; \ 125 \ 126 low = 0; high = nrow; \ 127 while (high-low > 5) { \ 128 t = (low+high)/2; \ 129 if (rp[t] > col) high = t; \ 130 else low = t; \ 131 } \ 132 for ( _i=0; _i<nrow; _i++ ) { \ 133 if (rp[_i] > col1) break; \ 134 if (rp[_i] == col1) { \ 135 if (addv == ADD_VALUES) ap[_i] += value; \ 136 else ap[_i] = value; \ 137 goto b_noinsert; \ 138 } \ 139 } \ 140 if (nonew == 1) goto b_noinsert; \ 141 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 142 if (nrow >= rmax) { \ 143 /* there is no extra room in row, therefore enlarge */ \ 144 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 145 Scalar *new_a; \ 146 \ 147 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 148 \ 149 /* malloc new storage space */ \ 150 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 151 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 152 new_j = (int *) (new_a + new_nz); \ 153 new_i = new_j + new_nz; \ 154 \ 155 /* copy over old data into new slots */ \ 156 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 157 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 158 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 159 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 160 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 161 len*sizeof(int));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 163 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 164 len*sizeof(Scalar));CHKERRQ(ierr); \ 165 /* free up old matrix storage */ \ 166 \ 167 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 168 if (!b->singlemalloc) { \ 169 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 170 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 171 } \ 172 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 173 b->singlemalloc = 1; \ 174 \ 175 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 176 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 177 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 178 b->maxnz += CHUNKSIZE; \ 179 b->reallocs++; \ 180 } \ 181 N = nrow++ - 1; b->nz++; \ 182 /* shift up all the later entries in this row */ \ 183 for ( ii=N; ii>=_i; ii-- ) { \ 184 rp[ii+1] = rp[ii]; \ 185 ap[ii+1] = ap[ii]; \ 186 } \ 187 rp[_i] = col1; \ 188 ap[_i] = value; \ 189 b_noinsert: ; \ 190 bilen[row] = nrow; \ 191 } 192 193 #undef __FUNC__ 194 #define __FUNC__ "MatSetValues_MPIAIJ" 195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 196 { 197 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 198 Scalar value; 199 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 200 int cstart = aij->cstart, cend = aij->cend,row,col; 201 int roworiented = aij->roworiented; 202 203 /* Some Variables required in the macro */ 204 Mat A = aij->A; 205 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 206 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 207 Scalar *aa = a->a; 208 209 Mat B = aij->B; 210 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 211 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 212 Scalar *ba = b->a; 213 214 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 215 int nonew = a->nonew,shift = a->indexshift; 216 Scalar *ap; 217 218 PetscFunctionBegin; 219 for ( i=0; i<m; i++ ) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_BOPT_g) 222 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 223 #endif 224 if (im[i] >= rstart && im[i] < rend) { 225 row = im[i] - rstart; 226 for ( j=0; j<n; j++ ) { 227 if (in[j] >= cstart && in[j] < cend){ 228 col = in[j] - cstart; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } 233 else if (in[j] < 0) continue; 234 #if defined(PETSC_USE_BOPT_g) 235 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 236 #endif 237 else { 238 if (mat->was_assembled) { 239 if (!aij->colmap) { 240 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 241 } 242 #if defined (PETSC_USE_CTABLE) 243 ierr = TableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 244 col--; 245 #else 246 col = aij->colmap[in[j]] - 1; 247 #endif 248 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 249 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 250 col = in[j]; 251 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 252 B = aij->B; 253 b = (Mat_SeqAIJ *) B->data; 254 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 255 ba = b->a; 256 } 257 } else col = in[j]; 258 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNC__ 277 #define __FUNC__ "MatGetValues_MPIAIJ" 278 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 279 { 280 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 281 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 282 int cstart = aij->cstart, cend = aij->cend,row,col; 283 284 PetscFunctionBegin; 285 for ( i=0; i<m; i++ ) { 286 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 287 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 288 if (idxm[i] >= rstart && idxm[i] < rend) { 289 row = idxm[i] - rstart; 290 for ( j=0; j<n; j++ ) { 291 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 292 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 293 if (idxn[j] >= cstart && idxn[j] < cend){ 294 col = idxn[j] - cstart; 295 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 296 } else { 297 if (!aij->colmap) { 298 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 299 } 300 #if defined (PETSC_USE_CTABLE) 301 ierr = TableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 302 col --; 303 #else 304 col = aij->colmap[idxn[j]] - 1; 305 #endif 306 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 307 else { 308 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 309 } 310 } 311 } 312 } else { 313 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 314 } 315 } 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNC__ 320 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 321 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 322 { 323 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 324 int ierr,nstash,reallocs; 325 InsertMode addv; 326 327 PetscFunctionBegin; 328 if (aij->donotstash) { 329 PetscFunctionReturn(0); 330 } 331 332 /* make sure all processors are either in INSERTMODE or ADDMODE */ 333 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 334 if (addv == (ADD_VALUES|INSERT_VALUES)) { 335 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 336 } 337 mat->insertmode = addv; /* in case this processor had no cache */ 338 339 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 340 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 341 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 342 PetscFunctionReturn(0); 343 } 344 345 346 #undef __FUNC__ 347 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 348 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 349 { 350 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 351 int i,j,rstart,ncols,n,ierr,flg; 352 int *row,*col,other_disassembled; 353 Scalar *val; 354 InsertMode addv = mat->insertmode; 355 356 PetscFunctionBegin; 357 if (!aij->donotstash) { 358 while (1) { 359 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 360 if (!flg) break; 361 362 for ( i=0; i<n; ) { 363 /* Now identify the consecutive vals belonging to the same row */ 364 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 365 if (j < n) ncols = j-i; 366 else ncols = n-i; 367 /* Now assemble all these values with a single function call */ 368 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 369 i = j; 370 } 371 } 372 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 373 } 374 375 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 376 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 377 378 /* determine if any processor has disassembled, if so we must 379 also disassemble ourselfs, in order that we may reassemble. */ 380 /* 381 if nonzero structure of submatrix B cannot change then we know that 382 no processor disassembled thus we can skip this stuff 383 */ 384 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 385 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 386 if (mat->was_assembled && !other_disassembled) { 387 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 388 } 389 } 390 391 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 392 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 393 } 394 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 395 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 396 397 if (aij->rowvalues) { 398 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 399 aij->rowvalues = 0; 400 } 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNC__ 405 #define __FUNC__ "MatZeroEntries_MPIAIJ" 406 int MatZeroEntries_MPIAIJ(Mat A) 407 { 408 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 409 int ierr; 410 411 PetscFunctionBegin; 412 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 413 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNC__ 418 #define __FUNC__ "MatZeroRows_MPIAIJ" 419 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 420 { 421 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 422 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 423 int *procs,*nprocs,j,found,idx,nsends,*work,row; 424 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 425 int *rvalues,tag = A->tag,count,base,slen,n,*source; 426 int *lens,imdex,*lrows,*values,rstart=l->rstart; 427 MPI_Comm comm = A->comm; 428 MPI_Request *send_waits,*recv_waits; 429 MPI_Status recv_status,*send_status; 430 IS istmp; 431 432 PetscFunctionBegin; 433 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 434 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 435 436 /* first count number of contributors to each processor */ 437 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 438 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 439 procs = nprocs + size; 440 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 441 for ( i=0; i<N; i++ ) { 442 idx = rows[i]; 443 found = 0; 444 for ( j=0; j<size; j++ ) { 445 if (idx >= owners[j] && idx < owners[j+1]) { 446 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 447 } 448 } 449 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 450 } 451 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 452 453 /* inform other processors of number of messages and max length*/ 454 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 455 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 456 nrecvs = work[rank]; 457 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 458 nmax = work[rank]; 459 ierr = PetscFree(work);CHKERRQ(ierr); 460 461 /* post receives: */ 462 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 463 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 464 for ( i=0; i<nrecvs; i++ ) { 465 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 466 } 467 468 /* do sends: 469 1) starts[i] gives the starting index in svalues for stuff going to 470 the ith processor 471 */ 472 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 473 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 474 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 475 starts[0] = 0; 476 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 477 for ( i=0; i<N; i++ ) { 478 svalues[starts[owner[i]]++] = rows[i]; 479 } 480 ISRestoreIndices(is,&rows); 481 482 starts[0] = 0; 483 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 484 count = 0; 485 for ( i=0; i<size; i++ ) { 486 if (procs[i]) { 487 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 488 } 489 } 490 ierr = PetscFree(starts);CHKERRQ(ierr); 491 492 base = owners[rank]; 493 494 /* wait on receives */ 495 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 496 source = lens + nrecvs; 497 count = nrecvs; slen = 0; 498 while (count) { 499 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 500 /* unpack receives into our local space */ 501 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 502 source[imdex] = recv_status.MPI_SOURCE; 503 lens[imdex] = n; 504 slen += n; 505 count--; 506 } 507 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 508 509 /* move the data into the send scatter */ 510 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 511 count = 0; 512 for ( i=0; i<nrecvs; i++ ) { 513 values = rvalues + i*nmax; 514 for ( j=0; j<lens[i]; j++ ) { 515 lrows[count++] = values[j] - base; 516 } 517 } 518 ierr = PetscFree(rvalues);CHKERRQ(ierr); 519 ierr = PetscFree(lens);CHKERRQ(ierr); 520 ierr = PetscFree(owner);CHKERRQ(ierr); 521 ierr = PetscFree(nprocs);CHKERRQ(ierr); 522 523 /* actually zap the local rows */ 524 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 525 PLogObjectParent(A,istmp); 526 527 /* 528 Zero the required rows. If the "diagonal block" of the matrix 529 is square and the user wishes to set the diagonal we use seperate 530 code so that MatSetValues() is not called for each diagonal allocating 531 new memory, thus calling lots of mallocs and slowing things down. 532 533 Contributed by: Mathew Knepley 534 */ 535 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 536 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 537 if (diag && (l->A->M == l->A->N)) { 538 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 539 } else if (diag) { 540 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 541 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 542 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 544 } 545 for ( i = 0; i < slen; i++ ) { 546 row = lrows[i] + rstart; 547 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 548 } 549 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 550 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 } else { 552 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 553 } 554 ierr = ISDestroy(istmp);CHKERRQ(ierr); 555 ierr = PetscFree(lrows);CHKERRQ(ierr); 556 557 /* wait on sends */ 558 if (nsends) { 559 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 560 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 561 ierr = PetscFree(send_status);CHKERRQ(ierr); 562 } 563 ierr = PetscFree(send_waits);CHKERRQ(ierr); 564 ierr = PetscFree(svalues);CHKERRQ(ierr); 565 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNC__ 570 #define __FUNC__ "MatMult_MPIAIJ" 571 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 572 { 573 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 574 int ierr,nt; 575 576 PetscFunctionBegin; 577 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 578 if (nt != a->n) { 579 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 580 } 581 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 582 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 583 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 584 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNC__ 589 #define __FUNC__ "MatMultAdd_MPIAIJ" 590 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 591 { 592 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 593 int ierr; 594 595 PetscFunctionBegin; 596 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 597 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 598 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 599 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 600 PetscFunctionReturn(0); 601 } 602 603 #undef __FUNC__ 604 #define __FUNC__ "MatMultTrans_MPIAIJ" 605 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 606 { 607 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 608 int ierr; 609 610 PetscFunctionBegin; 611 /* do nondiagonal part */ 612 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 613 /* send it on its way */ 614 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 615 /* do local part */ 616 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 617 /* receive remote parts: note this assumes the values are not actually */ 618 /* inserted in yy until the next line, which is true for my implementation*/ 619 /* but is not perhaps always true. */ 620 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNC__ 625 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 626 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 627 { 628 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 629 int ierr; 630 631 PetscFunctionBegin; 632 /* do nondiagonal part */ 633 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 634 /* send it on its way */ 635 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 636 /* do local part */ 637 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 638 /* receive remote parts: note this assumes the values are not actually */ 639 /* inserted in yy until the next line, which is true for my implementation*/ 640 /* but is not perhaps always true. */ 641 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 642 PetscFunctionReturn(0); 643 } 644 645 /* 646 This only works correctly for square matrices where the subblock A->A is the 647 diagonal block 648 */ 649 #undef __FUNC__ 650 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 651 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 652 { 653 int ierr; 654 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 655 656 PetscFunctionBegin; 657 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 658 if (a->rstart != a->cstart || a->rend != a->cend) { 659 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 660 } 661 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 662 PetscFunctionReturn(0); 663 } 664 665 #undef __FUNC__ 666 #define __FUNC__ "MatScale_MPIAIJ" 667 int MatScale_MPIAIJ(Scalar *aa,Mat A) 668 { 669 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 670 int ierr; 671 672 PetscFunctionBegin; 673 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 674 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 675 PetscFunctionReturn(0); 676 } 677 678 #undef __FUNC__ 679 #define __FUNC__ "MatDestroy_MPIAIJ" 680 int MatDestroy_MPIAIJ(Mat mat) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 683 int ierr; 684 685 PetscFunctionBegin; 686 687 if (mat->mapping) { 688 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 689 } 690 if (mat->bmapping) { 691 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 692 } 693 if (mat->rmap) { 694 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 695 } 696 if (mat->cmap) { 697 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 698 } 699 #if defined(PETSC_USE_LOG) 700 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 701 #endif 702 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 703 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 704 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 705 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 706 #if defined (PETSC_USE_CTABLE) 707 if (aij->colmap) TableDelete(aij->colmap); 708 #else 709 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 710 #endif 711 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 712 if (aij->lvec) VecDestroy(aij->lvec); 713 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 714 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 715 ierr = PetscFree(aij);CHKERRQ(ierr); 716 PLogObjectDestroy(mat); 717 PetscHeaderDestroy(mat); 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNC__ 722 #define __FUNC__ "MatView_MPIAIJ_Binary" 723 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 724 { 725 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 726 int ierr; 727 728 PetscFunctionBegin; 729 if (aij->size == 1) { 730 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 731 } 732 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 733 PetscFunctionReturn(0); 734 } 735 736 #undef __FUNC__ 737 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 738 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 739 { 740 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 741 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 742 int ierr, format,shift = C->indexshift,rank = aij->rank ; 743 int size = aij->size; 744 FILE *fd; 745 ViewerType vtype; 746 747 PetscFunctionBegin; 748 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 749 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 750 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 751 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 752 MatInfo info; 753 int flg; 754 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 755 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 756 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 757 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 758 PetscSequentialPhaseBegin(mat->comm,1); 759 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 760 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 761 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 762 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 763 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 764 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 765 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 766 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 767 fflush(fd); 768 PetscSequentialPhaseEnd(mat->comm,1); 769 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 770 PetscFunctionReturn(0); 771 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 772 PetscFunctionReturn(0); 773 } 774 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 775 Draw draw; 776 PetscTruth isnull; 777 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 778 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 779 } 780 781 if (size == 1) { 782 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 783 } else { 784 /* assemble the entire matrix onto first processor. */ 785 Mat A; 786 Mat_SeqAIJ *Aloc; 787 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 788 Scalar *a; 789 790 if (!rank) { 791 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 792 } else { 793 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 794 } 795 PLogObjectParent(mat,A); 796 797 /* copy over the A part */ 798 Aloc = (Mat_SeqAIJ*) aij->A->data; 799 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 800 row = aij->rstart; 801 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 802 for ( i=0; i<m; i++ ) { 803 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 804 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 805 } 806 aj = Aloc->j; 807 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 808 809 /* copy over the B part */ 810 Aloc = (Mat_SeqAIJ*) aij->B->data; 811 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 812 row = aij->rstart; 813 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols); 814 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 815 for ( i=0; i<m; i++ ) { 816 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 817 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 818 } 819 ierr = PetscFree(ct);CHKERRQ(ierr); 820 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 821 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 /* 823 Everyone has to call to draw the matrix since the graphics waits are 824 synchronized across all processors that share the Draw object 825 */ 826 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 827 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer);CHKERRQ(ierr); 828 } 829 ierr = MatDestroy(A);CHKERRQ(ierr); 830 } 831 PetscFunctionReturn(0); 832 } 833 834 #undef __FUNC__ 835 #define __FUNC__ "MatView_MPIAIJ" 836 int MatView_MPIAIJ(Mat mat,Viewer viewer) 837 { 838 int ierr; 839 ViewerType vtype; 840 841 PetscFunctionBegin; 842 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 843 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 844 PetscTypeCompare(vtype,SOCKET_VIEWER) || PetscTypeCompare(vtype,BINARY_VIEWER)) { 845 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 846 /* 847 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 848 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 849 */ 850 } else { 851 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 852 } 853 PetscFunctionReturn(0); 854 } 855 856 /* 857 This has to provide several versions. 858 859 2) a) use only local smoothing updating outer values only once. 860 b) local smoothing updating outer values each inner iteration 861 3) color updating out values betwen colors. 862 */ 863 #undef __FUNC__ 864 #define __FUNC__ "MatRelax_MPIAIJ" 865 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 866 double fshift,int its,Vec xx) 867 { 868 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 869 Mat AA = mat->A, BB = mat->B; 870 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 871 Scalar *b,*x,*xs,*ls,d,*v,sum; 872 int ierr,*idx, *diag; 873 int n = mat->n, m = mat->m, i,shift = A->indexshift; 874 875 PetscFunctionBegin; 876 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);} 877 diag = A->diag; 878 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 879 if (flag & SOR_ZERO_INITIAL_GUESS) { 880 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 881 PetscFunctionReturn(0); 882 } 883 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 884 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 885 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 886 if (xx != bb) { 887 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 888 } else { 889 b = x; 890 } 891 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 892 xs = x + shift; /* shift by one for index start of 1 */ 893 ls = ls + shift; 894 while (its--) { 895 /* go down through the rows */ 896 for ( i=0; i<m; i++ ) { 897 n = A->i[i+1] - A->i[i]; 898 PLogFlops(4*n+3); 899 idx = A->j + A->i[i] + shift; 900 v = A->a + A->i[i] + shift; 901 sum = b[i]; 902 SPARSEDENSEMDOT(sum,xs,v,idx,n); 903 d = fshift + A->a[diag[i]+shift]; 904 n = B->i[i+1] - B->i[i]; 905 idx = B->j + B->i[i] + shift; 906 v = B->a + B->i[i] + shift; 907 SPARSEDENSEMDOT(sum,ls,v,idx,n); 908 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 909 } 910 /* come up through the rows */ 911 for ( i=m-1; i>-1; i-- ) { 912 n = A->i[i+1] - A->i[i]; 913 PLogFlops(4*n+3) 914 idx = A->j + A->i[i] + shift; 915 v = A->a + A->i[i] + shift; 916 sum = b[i]; 917 SPARSEDENSEMDOT(sum,xs,v,idx,n); 918 d = fshift + A->a[diag[i]+shift]; 919 n = B->i[i+1] - B->i[i]; 920 idx = B->j + B->i[i] + shift; 921 v = B->a + B->i[i] + shift; 922 SPARSEDENSEMDOT(sum,ls,v,idx,n); 923 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 924 } 925 } 926 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 927 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 928 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 929 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 930 if (flag & SOR_ZERO_INITIAL_GUESS) { 931 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 932 PetscFunctionReturn(0); 933 } 934 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 935 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 936 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 937 if (xx != bb) { 938 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 939 } else { 940 b = x; 941 } 942 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 943 xs = x + shift; /* shift by one for index start of 1 */ 944 ls = ls + shift; 945 while (its--) { 946 for ( i=0; i<m; i++ ) { 947 n = A->i[i+1] - A->i[i]; 948 PLogFlops(4*n+3); 949 idx = A->j + A->i[i] + shift; 950 v = A->a + A->i[i] + shift; 951 sum = b[i]; 952 SPARSEDENSEMDOT(sum,xs,v,idx,n); 953 d = fshift + A->a[diag[i]+shift]; 954 n = B->i[i+1] - B->i[i]; 955 idx = B->j + B->i[i] + shift; 956 v = B->a + B->i[i] + shift; 957 SPARSEDENSEMDOT(sum,ls,v,idx,n); 958 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 959 } 960 } 961 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 962 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 963 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 964 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 965 if (flag & SOR_ZERO_INITIAL_GUESS) { 966 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 967 PetscFunctionReturn(0); 968 } 969 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 970 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 971 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 972 if (xx != bb) { 973 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 974 } else { 975 b = x; 976 } 977 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 978 xs = x + shift; /* shift by one for index start of 1 */ 979 ls = ls + shift; 980 while (its--) { 981 for ( i=m-1; i>-1; i-- ) { 982 n = A->i[i+1] - A->i[i]; 983 PLogFlops(4*n+3); 984 idx = A->j + A->i[i] + shift; 985 v = A->a + A->i[i] + shift; 986 sum = b[i]; 987 SPARSEDENSEMDOT(sum,xs,v,idx,n); 988 d = fshift + A->a[diag[i]+shift]; 989 n = B->i[i+1] - B->i[i]; 990 idx = B->j + B->i[i] + shift; 991 v = B->a + B->i[i] + shift; 992 SPARSEDENSEMDOT(sum,ls,v,idx,n); 993 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 994 } 995 } 996 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 997 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 998 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 999 } else { 1000 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1001 } 1002 PetscFunctionReturn(0); 1003 } 1004 1005 #undef __FUNC__ 1006 #define __FUNC__ "MatGetInfo_MPIAIJ" 1007 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1008 { 1009 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1010 Mat A = mat->A, B = mat->B; 1011 int ierr; 1012 double isend[5], irecv[5]; 1013 1014 PetscFunctionBegin; 1015 info->block_size = 1.0; 1016 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1017 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1018 isend[3] = info->memory; isend[4] = info->mallocs; 1019 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1020 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1021 isend[3] += info->memory; isend[4] += info->mallocs; 1022 if (flag == MAT_LOCAL) { 1023 info->nz_used = isend[0]; 1024 info->nz_allocated = isend[1]; 1025 info->nz_unneeded = isend[2]; 1026 info->memory = isend[3]; 1027 info->mallocs = isend[4]; 1028 } else if (flag == MAT_GLOBAL_MAX) { 1029 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1030 info->nz_used = irecv[0]; 1031 info->nz_allocated = irecv[1]; 1032 info->nz_unneeded = irecv[2]; 1033 info->memory = irecv[3]; 1034 info->mallocs = irecv[4]; 1035 } else if (flag == MAT_GLOBAL_SUM) { 1036 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1037 info->nz_used = irecv[0]; 1038 info->nz_allocated = irecv[1]; 1039 info->nz_unneeded = irecv[2]; 1040 info->memory = irecv[3]; 1041 info->mallocs = irecv[4]; 1042 } 1043 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1044 info->fill_ratio_needed = 0; 1045 info->factor_mallocs = 0; 1046 info->rows_global = (double)mat->M; 1047 info->columns_global = (double)mat->N; 1048 info->rows_local = (double)mat->m; 1049 info->columns_local = (double)mat->N; 1050 1051 PetscFunctionReturn(0); 1052 } 1053 1054 #undef __FUNC__ 1055 #define __FUNC__ "MatSetOption_MPIAIJ" 1056 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1057 { 1058 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1059 int ierr; 1060 1061 PetscFunctionBegin; 1062 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1063 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1064 op == MAT_COLUMNS_UNSORTED || 1065 op == MAT_COLUMNS_SORTED || 1066 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1067 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1068 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1069 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1070 } else if (op == MAT_ROW_ORIENTED) { 1071 a->roworiented = 1; 1072 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1073 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1074 } else if (op == MAT_ROWS_SORTED || 1075 op == MAT_ROWS_UNSORTED || 1076 op == MAT_SYMMETRIC || 1077 op == MAT_STRUCTURALLY_SYMMETRIC || 1078 op == MAT_YES_NEW_DIAGONALS) 1079 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1080 else if (op == MAT_COLUMN_ORIENTED) { 1081 a->roworiented = 0; 1082 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1083 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1084 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1085 a->donotstash = 1; 1086 } else if (op == MAT_NO_NEW_DIAGONALS){ 1087 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1088 } else { 1089 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1090 } 1091 PetscFunctionReturn(0); 1092 } 1093 1094 #undef __FUNC__ 1095 #define __FUNC__ "MatGetSize_MPIAIJ" 1096 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1097 { 1098 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1099 1100 PetscFunctionBegin; 1101 if (m) *m = mat->M; 1102 if (n) *n = mat->N; 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNC__ 1107 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1108 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1109 { 1110 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1111 1112 PetscFunctionBegin; 1113 if (m) *m = mat->m; 1114 if (n) *n = mat->n; 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNC__ 1119 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1120 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1121 { 1122 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1123 1124 PetscFunctionBegin; 1125 *m = mat->rstart; *n = mat->rend; 1126 PetscFunctionReturn(0); 1127 } 1128 1129 #undef __FUNC__ 1130 #define __FUNC__ "MatGetRow_MPIAIJ" 1131 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1132 { 1133 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1134 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1135 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1136 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1137 int *cmap, *idx_p; 1138 1139 PetscFunctionBegin; 1140 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1141 mat->getrowactive = PETSC_TRUE; 1142 1143 if (!mat->rowvalues && (idx || v)) { 1144 /* 1145 allocate enough space to hold information from the longest row. 1146 */ 1147 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1148 int max = 1,m = mat->m,tmp; 1149 for ( i=0; i<m; i++ ) { 1150 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1151 if (max < tmp) { max = tmp; } 1152 } 1153 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1154 mat->rowindices = (int *) (mat->rowvalues + max); 1155 } 1156 1157 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1158 lrow = row - rstart; 1159 1160 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1161 if (!v) {pvA = 0; pvB = 0;} 1162 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1163 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1164 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1165 nztot = nzA + nzB; 1166 1167 cmap = mat->garray; 1168 if (v || idx) { 1169 if (nztot) { 1170 /* Sort by increasing column numbers, assuming A and B already sorted */ 1171 int imark = -1; 1172 if (v) { 1173 *v = v_p = mat->rowvalues; 1174 for ( i=0; i<nzB; i++ ) { 1175 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1176 else break; 1177 } 1178 imark = i; 1179 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1180 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1181 } 1182 if (idx) { 1183 *idx = idx_p = mat->rowindices; 1184 if (imark > -1) { 1185 for ( i=0; i<imark; i++ ) { 1186 idx_p[i] = cmap[cworkB[i]]; 1187 } 1188 } else { 1189 for ( i=0; i<nzB; i++ ) { 1190 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1191 else break; 1192 } 1193 imark = i; 1194 } 1195 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1196 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1197 } 1198 } else { 1199 if (idx) *idx = 0; 1200 if (v) *v = 0; 1201 } 1202 } 1203 *nz = nztot; 1204 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1205 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1206 PetscFunctionReturn(0); 1207 } 1208 1209 #undef __FUNC__ 1210 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1211 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1212 { 1213 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1214 1215 PetscFunctionBegin; 1216 if (aij->getrowactive == PETSC_FALSE) { 1217 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1218 } 1219 aij->getrowactive = PETSC_FALSE; 1220 PetscFunctionReturn(0); 1221 } 1222 1223 #undef __FUNC__ 1224 #define __FUNC__ "MatNorm_MPIAIJ" 1225 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1226 { 1227 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1228 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1229 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1230 double sum = 0.0; 1231 Scalar *v; 1232 1233 PetscFunctionBegin; 1234 if (aij->size == 1) { 1235 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1236 } else { 1237 if (type == NORM_FROBENIUS) { 1238 v = amat->a; 1239 for (i=0; i<amat->nz; i++ ) { 1240 #if defined(PETSC_USE_COMPLEX) 1241 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1242 #else 1243 sum += (*v)*(*v); v++; 1244 #endif 1245 } 1246 v = bmat->a; 1247 for (i=0; i<bmat->nz; i++ ) { 1248 #if defined(PETSC_USE_COMPLEX) 1249 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1250 #else 1251 sum += (*v)*(*v); v++; 1252 #endif 1253 } 1254 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1255 *norm = sqrt(*norm); 1256 } else if (type == NORM_1) { /* max column norm */ 1257 double *tmp, *tmp2; 1258 int *jj, *garray = aij->garray; 1259 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp); 1260 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2); 1261 ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr); 1262 *norm = 0.0; 1263 v = amat->a; jj = amat->j; 1264 for ( j=0; j<amat->nz; j++ ) { 1265 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1266 } 1267 v = bmat->a; jj = bmat->j; 1268 for ( j=0; j<bmat->nz; j++ ) { 1269 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1270 } 1271 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1272 for ( j=0; j<aij->N; j++ ) { 1273 if (tmp2[j] > *norm) *norm = tmp2[j]; 1274 } 1275 ierr = PetscFree(tmp);CHKERRQ(ierr); 1276 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1277 } else if (type == NORM_INFINITY) { /* max row norm */ 1278 double ntemp = 0.0; 1279 for ( j=0; j<amat->m; j++ ) { 1280 v = amat->a + amat->i[j] + shift; 1281 sum = 0.0; 1282 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1283 sum += PetscAbsScalar(*v); v++; 1284 } 1285 v = bmat->a + bmat->i[j] + shift; 1286 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1287 sum += PetscAbsScalar(*v); v++; 1288 } 1289 if (sum > ntemp) ntemp = sum; 1290 } 1291 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1292 } else { 1293 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1294 } 1295 } 1296 PetscFunctionReturn(0); 1297 } 1298 1299 #undef __FUNC__ 1300 #define __FUNC__ "MatTranspose_MPIAIJ" 1301 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1302 { 1303 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1304 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1305 int ierr,shift = Aloc->indexshift; 1306 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1307 Mat B; 1308 Scalar *array; 1309 1310 PetscFunctionBegin; 1311 if (matout == PETSC_NULL && M != N) { 1312 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1313 } 1314 1315 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1316 1317 /* copy over the A part */ 1318 Aloc = (Mat_SeqAIJ*) a->A->data; 1319 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1320 row = a->rstart; 1321 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1322 for ( i=0; i<m; i++ ) { 1323 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1324 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1325 } 1326 aj = Aloc->j; 1327 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1328 1329 /* copy over the B part */ 1330 Aloc = (Mat_SeqAIJ*) a->B->data; 1331 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1332 row = a->rstart; 1333 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols); 1334 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1335 for ( i=0; i<m; i++ ) { 1336 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1337 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1338 } 1339 ierr = PetscFree(ct);CHKERRQ(ierr); 1340 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1341 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1342 if (matout != PETSC_NULL) { 1343 *matout = B; 1344 } else { 1345 PetscOps *Abops; 1346 struct _MatOps *Aops; 1347 1348 /* This isn't really an in-place transpose .... but free data structures from a */ 1349 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 1350 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1351 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1352 #if defined (PETSC_USE_CTABLE) 1353 if (a->colmap) TableDelete(a->colmap); 1354 #else 1355 if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);} 1356 #endif 1357 if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);} 1358 if (a->lvec) VecDestroy(a->lvec); 1359 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1360 ierr = PetscFree(a);CHKERRQ(ierr); 1361 1362 /* 1363 This is horrible, horrible code. We need to keep the 1364 A pointers for the bops and ops but copy everything 1365 else from C. 1366 */ 1367 Abops = A->bops; 1368 Aops = A->ops; 1369 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1370 A->bops = Abops; 1371 A->ops = Aops; 1372 PetscHeaderDestroy(B); 1373 } 1374 PetscFunctionReturn(0); 1375 } 1376 1377 #undef __FUNC__ 1378 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1379 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1380 { 1381 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1382 Mat a = aij->A, b = aij->B; 1383 int ierr,s1,s2,s3; 1384 1385 PetscFunctionBegin; 1386 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1387 if (rr) { 1388 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1389 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1390 /* Overlap communication with computation. */ 1391 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1392 } 1393 if (ll) { 1394 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1395 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1396 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1397 } 1398 /* scale the diagonal block */ 1399 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1400 1401 if (rr) { 1402 /* Do a scatter end and then right scale the off-diagonal block */ 1403 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1404 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1405 } 1406 1407 PetscFunctionReturn(0); 1408 } 1409 1410 1411 #undef __FUNC__ 1412 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1413 int MatPrintHelp_MPIAIJ(Mat A) 1414 { 1415 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1416 int ierr; 1417 1418 PetscFunctionBegin; 1419 if (!a->rank) { 1420 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1421 } 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNC__ 1426 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1427 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1428 { 1429 PetscFunctionBegin; 1430 *bs = 1; 1431 PetscFunctionReturn(0); 1432 } 1433 #undef __FUNC__ 1434 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1435 int MatSetUnfactored_MPIAIJ(Mat A) 1436 { 1437 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1438 int ierr; 1439 1440 PetscFunctionBegin; 1441 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1442 PetscFunctionReturn(0); 1443 } 1444 1445 #undef __FUNC__ 1446 #define __FUNC__ "MatEqual_MPIAIJ" 1447 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1448 { 1449 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1450 Mat a, b, c, d; 1451 PetscTruth flg; 1452 int ierr; 1453 1454 PetscFunctionBegin; 1455 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1456 a = matA->A; b = matA->B; 1457 c = matB->A; d = matB->B; 1458 1459 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1460 if (flg == PETSC_TRUE) { 1461 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1462 } 1463 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1464 PetscFunctionReturn(0); 1465 } 1466 1467 #undef __FUNC__ 1468 #define __FUNC__ "MatCopy_MPIAIJ" 1469 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1470 { 1471 int ierr; 1472 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1473 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1474 1475 PetscFunctionBegin; 1476 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1477 /* because of the column compression in the off-processor part of the matrix a->B, 1478 the number of columns in a->B and b->B may be different, hence we cannot call 1479 the MatCopy() directly on the two parts. If need be, we can provide a more 1480 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1481 then copying the submatrices */ 1482 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1483 } else { 1484 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1485 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1486 } 1487 PetscFunctionReturn(0); 1488 } 1489 1490 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1491 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1492 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1493 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 1494 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 1495 1496 /* -------------------------------------------------------------------*/ 1497 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1498 MatGetRow_MPIAIJ, 1499 MatRestoreRow_MPIAIJ, 1500 MatMult_MPIAIJ, 1501 MatMultAdd_MPIAIJ, 1502 MatMultTrans_MPIAIJ, 1503 MatMultTransAdd_MPIAIJ, 1504 0, 1505 0, 1506 0, 1507 0, 1508 0, 1509 0, 1510 MatRelax_MPIAIJ, 1511 MatTranspose_MPIAIJ, 1512 MatGetInfo_MPIAIJ, 1513 MatEqual_MPIAIJ, 1514 MatGetDiagonal_MPIAIJ, 1515 MatDiagonalScale_MPIAIJ, 1516 MatNorm_MPIAIJ, 1517 MatAssemblyBegin_MPIAIJ, 1518 MatAssemblyEnd_MPIAIJ, 1519 0, 1520 MatSetOption_MPIAIJ, 1521 MatZeroEntries_MPIAIJ, 1522 MatZeroRows_MPIAIJ, 1523 0, 1524 0, 1525 0, 1526 0, 1527 MatGetSize_MPIAIJ, 1528 MatGetLocalSize_MPIAIJ, 1529 MatGetOwnershipRange_MPIAIJ, 1530 0, 1531 0, 1532 0, 1533 0, 1534 MatDuplicate_MPIAIJ, 1535 0, 1536 0, 1537 0, 1538 0, 1539 0, 1540 MatGetSubMatrices_MPIAIJ, 1541 MatIncreaseOverlap_MPIAIJ, 1542 MatGetValues_MPIAIJ, 1543 MatCopy_MPIAIJ, 1544 MatPrintHelp_MPIAIJ, 1545 MatScale_MPIAIJ, 1546 0, 1547 0, 1548 0, 1549 MatGetBlockSize_MPIAIJ, 1550 0, 1551 0, 1552 0, 1553 0, 1554 MatFDColoringCreate_MPIAIJ, 1555 0, 1556 MatSetUnfactored_MPIAIJ, 1557 0, 1558 0, 1559 MatGetSubMatrix_MPIAIJ, 1560 0, 1561 0, 1562 MatGetMaps_Petsc}; 1563 1564 /* ----------------------------------------------------------------------------------------*/ 1565 1566 EXTERN_C_BEGIN 1567 #undef __FUNC__ 1568 #define __FUNC__ "MatStoreValues_MPIAIJ" 1569 int MatStoreValues_MPIAIJ(Mat mat) 1570 { 1571 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1572 int ierr; 1573 1574 PetscFunctionBegin; 1575 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1576 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1577 PetscFunctionReturn(0); 1578 } 1579 EXTERN_C_END 1580 1581 EXTERN_C_BEGIN 1582 #undef __FUNC__ 1583 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1584 int MatRetrieveValues_MPIAIJ(Mat mat) 1585 { 1586 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1587 int ierr; 1588 1589 PetscFunctionBegin; 1590 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1591 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1592 PetscFunctionReturn(0); 1593 } 1594 EXTERN_C_END 1595 1596 #include "pc.h" 1597 EXTERN_C_BEGIN 1598 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1599 EXTERN_C_END 1600 1601 #undef __FUNC__ 1602 #define __FUNC__ "MatCreateMPIAIJ" 1603 /*@C 1604 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1605 (the default parallel PETSc format). For good matrix assembly performance 1606 the user should preallocate the matrix storage by setting the parameters 1607 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1608 performance can be increased by more than a factor of 50. 1609 1610 Collective on MPI_Comm 1611 1612 Input Parameters: 1613 + comm - MPI communicator 1614 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1615 This value should be the same as the local size used in creating the 1616 y vector for the matrix-vector product y = Ax. 1617 . n - This value should be the same as the local size used in creating the 1618 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1619 calculated if N is given) For square matrices n is almost always m. 1620 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1621 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1622 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1623 (same value is used for all local rows) 1624 . d_nnz - array containing the number of nonzeros in the various rows of the 1625 DIAGONAL portion of the local submatrix (possibly different for each row) 1626 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1627 The size of this array is equal to the number of local rows, i.e 'm'. 1628 You must leave room for the diagonal entry even if it is zero. 1629 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1630 submatrix (same value is used for all local rows). 1631 - o_nnz - array containing the number of nonzeros in the various rows of the 1632 OFF-DIAGONAL portion of the local submatrix (possibly different for 1633 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1634 structure. The size of this array is equal to the number 1635 of local rows, i.e 'm'. 1636 1637 Output Parameter: 1638 . A - the matrix 1639 1640 Notes: 1641 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1642 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1643 storage requirements for this matrix. 1644 1645 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1646 processor than it must be used on all processors that share the object for 1647 that argument. 1648 1649 The AIJ format (also called the Yale sparse matrix format or 1650 compressed row storage), is fully compatible with standard Fortran 77 1651 storage. That is, the stored row and column indices can begin at 1652 either one (as in Fortran) or zero. See the users manual for details. 1653 1654 The user MUST specify either the local or global matrix dimensions 1655 (possibly both). 1656 1657 The parallel matrix is partitioned such that the first m0 rows belong to 1658 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1659 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1660 1661 The DIAGONAL portion of the local submatrix of a processor can be defined 1662 as the submatrix which is obtained by extraction the part corresponding 1663 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1664 first row that belongs to the processor, and r2 is the last row belonging 1665 to the this processor. This is a square mxm matrix. The remaining portion 1666 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1667 1668 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1669 1670 By default, this format uses inodes (identical nodes) when possible. 1671 We search for consecutive rows with the same nonzero structure, thereby 1672 reusing matrix information to achieve increased efficiency. 1673 1674 Options Database Keys: 1675 + -mat_aij_no_inode - Do not use inodes 1676 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1677 - -mat_aij_oneindex - Internally use indexing starting at 1 1678 rather than 0. Note that when calling MatSetValues(), 1679 the user still MUST index entries starting at 0! 1680 . -mat_mpi - use the parallel matrix data structures even on one processor 1681 (defaults to using SeqBAIJ format on one processor) 1682 . -mat_mpi - use the parallel matrix data structures even on one processor 1683 (defaults to using SeqAIJ format on one processor) 1684 1685 1686 Example usage: 1687 1688 Consider the following 8x8 matrix with 34 non-zero values, that is 1689 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1690 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1691 as follows: 1692 1693 .vb 1694 1 2 0 | 0 3 0 | 0 4 1695 Proc0 0 5 6 | 7 0 0 | 8 0 1696 9 0 10 | 11 0 0 | 12 0 1697 ------------------------------------- 1698 13 0 14 | 15 16 17 | 0 0 1699 Proc1 0 18 0 | 19 20 21 | 0 0 1700 0 0 0 | 22 23 0 | 24 0 1701 ------------------------------------- 1702 Proc2 25 26 27 | 0 0 28 | 29 0 1703 30 0 0 | 31 32 33 | 0 34 1704 .ve 1705 1706 This can be represented as a collection of submatrices as: 1707 1708 .vb 1709 A B C 1710 D E F 1711 G H I 1712 .ve 1713 1714 Where the submatrices A,B,C are owned by proc0, D,E,F are 1715 owned by proc1, G,H,I are owned by proc2. 1716 1717 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1718 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1719 The 'M','N' parameters are 8,8, and have the same values on all procs. 1720 1721 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1722 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1723 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1724 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1725 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1726 matrix, ans [DF] as another SeqAIJ matrix. 1727 1728 When d_nz, o_nz parameters are specified, d_nz storage elements are 1729 allocated for every row of the local diagonal submatrix, and o_nz 1730 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1731 One way to choose d_nz and o_nz is to use the max nonzerors per local 1732 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1733 In this case, the values of d_nz,o_nz are: 1734 .vb 1735 proc0 : dnz = 2, o_nz = 2 1736 proc1 : dnz = 3, o_nz = 2 1737 proc2 : dnz = 1, o_nz = 4 1738 .ve 1739 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1740 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1741 for proc3. i.e we are using 12+15+10=37 storage locations to store 1742 34 values. 1743 1744 When d_nnz, o_nnz parameters are specified, the storage is specified 1745 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1746 In the above case the values for d_nnz,o_nnz are: 1747 .vb 1748 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1749 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1750 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1751 .ve 1752 Here the space allocated is sum of all the above values i.e 34, and 1753 hence pre-allocation is perfect. 1754 1755 Level: intermediate 1756 1757 .keywords: matrix, aij, compressed row, sparse, parallel 1758 1759 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1760 @*/ 1761 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1762 { 1763 Mat B; 1764 Mat_MPIAIJ *b; 1765 int ierr, i,size,flag1 = 0, flag2 = 0; 1766 1767 PetscFunctionBegin; 1768 1769 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz); 1770 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz); 1771 if (d_nnz) { 1772 for (i=0; i<m; i++) { 1773 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1774 } 1775 } 1776 if (o_nnz) { 1777 for (i=0; i<m; i++) { 1778 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1779 } 1780 } 1781 1782 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1783 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr); 1784 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1785 if (!flag1 && !flag2 && size == 1) { 1786 if (M == PETSC_DECIDE) M = m; 1787 if (N == PETSC_DECIDE) N = n; 1788 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 1789 PetscFunctionReturn(0); 1790 } 1791 1792 *A = 0; 1793 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1794 PLogObjectCreate(B); 1795 B->data = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b); 1796 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1797 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1798 B->ops->destroy = MatDestroy_MPIAIJ; 1799 B->ops->view = MatView_MPIAIJ; 1800 B->factor = 0; 1801 B->assembled = PETSC_FALSE; 1802 B->mapping = 0; 1803 1804 B->insertmode = NOT_SET_VALUES; 1805 b->size = size; 1806 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1807 1808 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1809 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1810 } 1811 1812 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1813 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1814 b->m = m; B->m = m; 1815 b->n = n; B->n = n; 1816 b->N = N; B->N = N; 1817 b->M = M; B->M = M; 1818 1819 /* the information in the maps duplicates the information computed below, eventually 1820 we should remove the duplicate information that is not contained in the maps */ 1821 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1822 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1823 1824 /* build local table of row and column ownerships */ 1825 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1826 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1827 b->cowners = b->rowners + b->size + 2; 1828 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1829 b->rowners[0] = 0; 1830 for ( i=2; i<=b->size; i++ ) { 1831 b->rowners[i] += b->rowners[i-1]; 1832 } 1833 b->rstart = b->rowners[b->rank]; 1834 b->rend = b->rowners[b->rank+1]; 1835 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1836 b->cowners[0] = 0; 1837 for ( i=2; i<=b->size; i++ ) { 1838 b->cowners[i] += b->cowners[i-1]; 1839 } 1840 b->cstart = b->cowners[b->rank]; 1841 b->cend = b->cowners[b->rank+1]; 1842 1843 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1844 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1845 PLogObjectParent(B,b->A); 1846 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1847 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1848 PLogObjectParent(B,b->B); 1849 1850 /* build cache for off array entries formed */ 1851 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 1852 b->donotstash = 0; 1853 b->colmap = 0; 1854 b->garray = 0; 1855 b->roworiented = 1; 1856 1857 /* stuff used for matrix vector multiply */ 1858 b->lvec = 0; 1859 b->Mvctx = 0; 1860 1861 /* stuff for MatGetRow() */ 1862 b->rowindices = 0; 1863 b->rowvalues = 0; 1864 b->getrowactive = PETSC_FALSE; 1865 1866 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1867 "MatStoreValues_MPIAIJ", 1868 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1869 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1870 "MatRetrieveValues_MPIAIJ", 1871 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1872 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 1873 "MatGetDiagonalBlock_MPIAIJ", 1874 (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1875 *A = B; 1876 PetscFunctionReturn(0); 1877 } 1878 1879 #undef __FUNC__ 1880 #define __FUNC__ "MatDuplicate_MPIAIJ" 1881 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1882 { 1883 Mat mat; 1884 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1885 int ierr, len=0, flg; 1886 1887 PetscFunctionBegin; 1888 *newmat = 0; 1889 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1890 PLogObjectCreate(mat); 1891 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a); 1892 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1893 mat->ops->destroy = MatDestroy_MPIAIJ; 1894 mat->ops->view = MatView_MPIAIJ; 1895 mat->factor = matin->factor; 1896 mat->assembled = PETSC_TRUE; 1897 mat->insertmode = NOT_SET_VALUES; 1898 1899 a->m = mat->m = oldmat->m; 1900 a->n = mat->n = oldmat->n; 1901 a->M = mat->M = oldmat->M; 1902 a->N = mat->N = oldmat->N; 1903 1904 a->rstart = oldmat->rstart; 1905 a->rend = oldmat->rend; 1906 a->cstart = oldmat->cstart; 1907 a->cend = oldmat->cend; 1908 a->size = oldmat->size; 1909 a->rank = oldmat->rank; 1910 a->donotstash = oldmat->donotstash; 1911 a->roworiented = oldmat->roworiented; 1912 a->rowindices = 0; 1913 a->rowvalues = 0; 1914 a->getrowactive = PETSC_FALSE; 1915 1916 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1917 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1918 a->cowners = a->rowners + a->size + 2; 1919 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1920 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1921 if (oldmat->colmap) { 1922 #if defined (PETSC_USE_CTABLE) 1923 ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1924 #else 1925 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1926 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1927 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr); 1928 #endif 1929 } else a->colmap = 0; 1930 if (oldmat->garray) { 1931 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1932 a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray); 1933 PLogObjectMemory(mat,len*sizeof(int)); 1934 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1935 } else a->garray = 0; 1936 1937 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1938 PLogObjectParent(mat,a->lvec); 1939 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1940 PLogObjectParent(mat,a->Mvctx); 1941 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1942 PLogObjectParent(mat,a->A); 1943 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1944 PLogObjectParent(mat,a->B); 1945 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1946 if (flg) { 1947 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1948 } 1949 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1950 *newmat = mat; 1951 PetscFunctionReturn(0); 1952 } 1953 1954 #include "sys.h" 1955 1956 #undef __FUNC__ 1957 #define __FUNC__ "MatLoad_MPIAIJ" 1958 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1959 { 1960 Mat A; 1961 Scalar *vals,*svals; 1962 MPI_Comm comm = ((PetscObject)viewer)->comm; 1963 MPI_Status status; 1964 int i, nz, ierr, j,rstart, rend, fd; 1965 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1966 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1967 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1968 1969 PetscFunctionBegin; 1970 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1971 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1972 if (!rank) { 1973 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1974 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1975 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1976 if (header[3] < 0) { 1977 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1978 } 1979 } 1980 1981 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1982 M = header[1]; N = header[2]; 1983 /* determine ownership of all rows */ 1984 m = M/size + ((M % size) > rank); 1985 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1986 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1987 rowners[0] = 0; 1988 for ( i=2; i<=size; i++ ) { 1989 rowners[i] += rowners[i-1]; 1990 } 1991 rstart = rowners[rank]; 1992 rend = rowners[rank+1]; 1993 1994 /* distribute row lengths to all processors */ 1995 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1996 offlens = ourlens + (rend-rstart); 1997 if (!rank) { 1998 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1999 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2000 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 2001 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 2002 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2003 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2004 } else { 2005 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 2006 } 2007 2008 if (!rank) { 2009 /* calculate the number of nonzeros on each processor */ 2010 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 2011 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2012 for ( i=0; i<size; i++ ) { 2013 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 2014 procsnz[i] += rowlengths[j]; 2015 } 2016 } 2017 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2018 2019 /* determine max buffer needed and allocate it */ 2020 maxnz = 0; 2021 for ( i=0; i<size; i++ ) { 2022 maxnz = PetscMax(maxnz,procsnz[i]); 2023 } 2024 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 2025 2026 /* read in my part of the matrix column indices */ 2027 nz = procsnz[0]; 2028 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 2029 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2030 2031 /* read in every one elses and ship off */ 2032 for ( i=1; i<size; i++ ) { 2033 nz = procsnz[i]; 2034 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2035 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2036 } 2037 ierr = PetscFree(cols);CHKERRQ(ierr); 2038 } else { 2039 /* determine buffer space needed for message */ 2040 nz = 0; 2041 for ( i=0; i<m; i++ ) { 2042 nz += ourlens[i]; 2043 } 2044 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 2045 2046 /* receive message of column indices*/ 2047 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2048 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2049 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2050 } 2051 2052 /* determine column ownership if matrix is not square */ 2053 if (N != M) { 2054 n = N/size + ((N % size) > rank); 2055 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2056 cstart = cend - n; 2057 } else { 2058 cstart = rstart; 2059 cend = rend; 2060 n = cend - cstart; 2061 } 2062 2063 /* loop over local rows, determining number of off diagonal entries */ 2064 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2065 jj = 0; 2066 for ( i=0; i<m; i++ ) { 2067 for ( j=0; j<ourlens[i]; j++ ) { 2068 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2069 jj++; 2070 } 2071 } 2072 2073 /* create our matrix */ 2074 for ( i=0; i<m; i++ ) { 2075 ourlens[i] -= offlens[i]; 2076 } 2077 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2078 A = *newmat; 2079 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2080 for ( i=0; i<m; i++ ) { 2081 ourlens[i] += offlens[i]; 2082 } 2083 2084 if (!rank) { 2085 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 2086 2087 /* read in my part of the matrix numerical values */ 2088 nz = procsnz[0]; 2089 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2090 2091 /* insert into matrix */ 2092 jj = rstart; 2093 smycols = mycols; 2094 svals = vals; 2095 for ( i=0; i<m; i++ ) { 2096 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2097 smycols += ourlens[i]; 2098 svals += ourlens[i]; 2099 jj++; 2100 } 2101 2102 /* read in other processors and ship out */ 2103 for ( i=1; i<size; i++ ) { 2104 nz = procsnz[i]; 2105 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2106 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2107 } 2108 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2109 } else { 2110 /* receive numeric values */ 2111 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 2112 2113 /* receive message of values*/ 2114 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2115 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2116 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2117 2118 /* insert into matrix */ 2119 jj = rstart; 2120 smycols = mycols; 2121 svals = vals; 2122 for ( i=0; i<m; i++ ) { 2123 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2124 smycols += ourlens[i]; 2125 svals += ourlens[i]; 2126 jj++; 2127 } 2128 } 2129 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2130 ierr = PetscFree(vals);CHKERRQ(ierr); 2131 ierr = PetscFree(mycols);CHKERRQ(ierr); 2132 ierr = PetscFree(rowners);CHKERRQ(ierr); 2133 2134 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2135 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2136 PetscFunctionReturn(0); 2137 } 2138 2139 #undef __FUNC__ 2140 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2141 /* 2142 Not great since it makes two copies of the submatrix, first an SeqAIJ 2143 in local and then by concatenating the local matrices the end result. 2144 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2145 */ 2146 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2147 { 2148 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2149 Mat *local,M, Mreuse; 2150 Scalar *vwork,*aa; 2151 MPI_Comm comm = mat->comm; 2152 Mat_SeqAIJ *aij; 2153 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2154 2155 PetscFunctionBegin; 2156 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2157 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2158 2159 if (call == MAT_REUSE_MATRIX) { 2160 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2161 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2162 local = &Mreuse; 2163 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2164 } else { 2165 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2166 Mreuse = *local; 2167 ierr = PetscFree(local);CHKERRQ(ierr); 2168 } 2169 2170 /* 2171 m - number of local rows 2172 n - number of columns (same on all processors) 2173 rstart - first row in new global matrix generated 2174 */ 2175 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2176 if (call == MAT_INITIAL_MATRIX) { 2177 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2178 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2179 ii = aij->i; 2180 jj = aij->j; 2181 2182 /* 2183 Determine the number of non-zeros in the diagonal and off-diagonal 2184 portions of the matrix in order to do correct preallocation 2185 */ 2186 2187 /* first get start and end of "diagonal" columns */ 2188 if (csize == PETSC_DECIDE) { 2189 nlocal = n/size + ((n % size) > rank); 2190 } else { 2191 nlocal = csize; 2192 } 2193 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2194 rstart = rend - nlocal; 2195 if (rank == size - 1 && rend != n) { 2196 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2197 } 2198 2199 /* next, compute all the lengths */ 2200 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2201 olens = dlens + m; 2202 for ( i=0; i<m; i++ ) { 2203 jend = ii[i+1] - ii[i]; 2204 olen = 0; 2205 dlen = 0; 2206 for ( j=0; j<jend; j++ ) { 2207 if ( *jj < rstart || *jj >= rend) olen++; 2208 else dlen++; 2209 jj++; 2210 } 2211 olens[i] = olen; 2212 dlens[i] = dlen; 2213 } 2214 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2215 ierr = PetscFree(dlens);CHKERRQ(ierr); 2216 } else { 2217 int ml,nl; 2218 2219 M = *newmat; 2220 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2221 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2222 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2223 /* 2224 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2225 rather than the slower MatSetValues(). 2226 */ 2227 M->was_assembled = PETSC_TRUE; 2228 M->assembled = PETSC_FALSE; 2229 } 2230 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2231 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2232 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2233 ii = aij->i; 2234 jj = aij->j; 2235 aa = aij->a; 2236 for (i=0; i<m; i++) { 2237 row = rstart + i; 2238 nz = ii[i+1] - ii[i]; 2239 cwork = jj; jj += nz; 2240 vwork = aa; aa += nz; 2241 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2242 } 2243 2244 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2245 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2246 *newmat = M; 2247 2248 /* save submatrix used in processor for next request */ 2249 if (call == MAT_INITIAL_MATRIX) { 2250 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2251 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2252 } 2253 2254 PetscFunctionReturn(0); 2255 } 2256