1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.295 1999/06/08 22:55:49 balay Exp balay $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 extern int MatSetUpMultiply_MPIAIJ(Mat); 10 extern int DisAssemble_MPIAIJ(Mat); 11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 14 extern int MatPrintHelp_SeqAIJ(Mat); 15 16 /* local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 26 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 27 int n = B->n,i,ierr; 28 29 PetscFunctionBegin; 30 #if defined (PETSC_USE_CTABLE) 31 ierr = TableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr); 32 for ( i=0; i<n; i++ ){ 33 ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 34 } 35 #else 36 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 37 PLogObjectMemory(mat,aij->N*sizeof(int)); 38 ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr); 39 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 40 #endif 41 PetscFunctionReturn(0); 42 } 43 44 #define CHUNKSIZE 15 45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 46 { \ 47 \ 48 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 49 rmax = aimax[row]; nrow = ailen[row]; \ 50 col1 = col - shift; \ 51 \ 52 low = 0; high = nrow; \ 53 while (high-low > 5) { \ 54 t = (low+high)/2; \ 55 if (rp[t] > col) high = t; \ 56 else low = t; \ 57 } \ 58 for ( _i=0; _i<nrow; _i++ ) { \ 59 if (rp[_i] > col1) break; \ 60 if (rp[_i] == col1) { \ 61 if (addv == ADD_VALUES) ap[_i] += value; \ 62 else ap[_i] = value; \ 63 goto a_noinsert; \ 64 } \ 65 } \ 66 if (nonew == 1) goto a_noinsert; \ 67 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 68 if (nrow >= rmax) { \ 69 /* there is no extra room in row, therefore enlarge */ \ 70 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 71 Scalar *new_a; \ 72 \ 73 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 74 \ 75 /* malloc new storage space */ \ 76 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 77 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 78 new_j = (int *) (new_a + new_nz); \ 79 new_i = new_j + new_nz; \ 80 \ 81 /* copy over old data into new slots */ \ 82 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 83 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 84 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 85 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 86 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 87 len*sizeof(int));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 90 len*sizeof(Scalar));CHKERRQ(ierr); \ 91 /* free up old matrix storage */ \ 92 \ 93 PetscFree(a->a); \ 94 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 95 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 96 a->singlemalloc = 1; \ 97 \ 98 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 99 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 100 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 101 a->maxnz += CHUNKSIZE; \ 102 a->reallocs++; \ 103 } \ 104 N = nrow++ - 1; a->nz++; \ 105 /* shift up all the later entries in this row */ \ 106 for ( ii=N; ii>=_i; ii-- ) { \ 107 rp[ii+1] = rp[ii]; \ 108 ap[ii+1] = ap[ii]; \ 109 } \ 110 rp[_i] = col1; \ 111 ap[_i] = value; \ 112 a_noinsert: ; \ 113 ailen[row] = nrow; \ 114 } 115 116 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 117 { \ 118 \ 119 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 120 rmax = bimax[row]; nrow = bilen[row]; \ 121 col1 = col - shift; \ 122 \ 123 low = 0; high = nrow; \ 124 while (high-low > 5) { \ 125 t = (low+high)/2; \ 126 if (rp[t] > col) high = t; \ 127 else low = t; \ 128 } \ 129 for ( _i=0; _i<nrow; _i++ ) { \ 130 if (rp[_i] > col1) break; \ 131 if (rp[_i] == col1) { \ 132 if (addv == ADD_VALUES) ap[_i] += value; \ 133 else ap[_i] = value; \ 134 goto b_noinsert; \ 135 } \ 136 } \ 137 if (nonew == 1) goto b_noinsert; \ 138 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 139 if (nrow >= rmax) { \ 140 /* there is no extra room in row, therefore enlarge */ \ 141 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 142 Scalar *new_a; \ 143 \ 144 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 145 \ 146 /* malloc new storage space */ \ 147 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 148 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 149 new_j = (int *) (new_a + new_nz); \ 150 new_i = new_j + new_nz; \ 151 \ 152 /* copy over old data into new slots */ \ 153 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 154 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 155 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 156 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 157 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 158 len*sizeof(int));CHKERRQ(ierr); \ 159 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 160 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 161 len*sizeof(Scalar));CHKERRQ(ierr); \ 162 /* free up old matrix storage */ \ 163 \ 164 PetscFree(b->a); \ 165 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 166 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 167 b->singlemalloc = 1; \ 168 \ 169 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 170 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 171 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 172 b->maxnz += CHUNKSIZE; \ 173 b->reallocs++; \ 174 } \ 175 N = nrow++ - 1; b->nz++; \ 176 /* shift up all the later entries in this row */ \ 177 for ( ii=N; ii>=_i; ii-- ) { \ 178 rp[ii+1] = rp[ii]; \ 179 ap[ii+1] = ap[ii]; \ 180 } \ 181 rp[_i] = col1; \ 182 ap[_i] = value; \ 183 b_noinsert: ; \ 184 bilen[row] = nrow; \ 185 } 186 187 #undef __FUNC__ 188 #define __FUNC__ "MatSetValues_MPIAIJ" 189 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 190 { 191 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 192 Scalar value; 193 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 194 int cstart = aij->cstart, cend = aij->cend,row,col; 195 int roworiented = aij->roworiented; 196 197 /* Some Variables required in the macro */ 198 Mat A = aij->A; 199 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 200 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 201 Scalar *aa = a->a; 202 203 Mat B = aij->B; 204 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 205 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 206 Scalar *ba = b->a; 207 208 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 209 int nonew = a->nonew,shift = a->indexshift; 210 Scalar *ap; 211 212 PetscFunctionBegin; 213 for ( i=0; i<m; i++ ) { 214 if (im[i] < 0) continue; 215 #if defined(PETSC_USE_BOPT_g) 216 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 217 #endif 218 if (im[i] >= rstart && im[i] < rend) { 219 row = im[i] - rstart; 220 for ( j=0; j<n; j++ ) { 221 if (in[j] >= cstart && in[j] < cend){ 222 col = in[j] - cstart; 223 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 224 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 225 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 226 } 227 else if (in[j] < 0) continue; 228 #if defined(PETSC_USE_BOPT_g) 229 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 230 #endif 231 else { 232 if (mat->was_assembled) { 233 if (!aij->colmap) { 234 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 235 } 236 #if defined (PETSC_USE_CTABLE) 237 ierr = TableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 238 col--; 239 #else 240 col = aij->colmap[in[j]] - 1; 241 #endif 242 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 243 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 244 col = in[j]; 245 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 246 B = aij->B; 247 b = (Mat_SeqAIJ *) B->data; 248 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 249 ba = b->a; 250 } 251 } else col = in[j]; 252 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 253 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 254 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 255 } 256 } 257 } else { 258 if (!aij->donotstash) { 259 if (roworiented) { 260 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 261 } else { 262 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 263 } 264 } 265 } 266 } 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNC__ 271 #define __FUNC__ "MatGetValues_MPIAIJ" 272 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 275 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 276 int cstart = aij->cstart, cend = aij->cend,row,col; 277 278 PetscFunctionBegin; 279 for ( i=0; i<m; i++ ) { 280 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 281 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 282 if (idxm[i] >= rstart && idxm[i] < rend) { 283 row = idxm[i] - rstart; 284 for ( j=0; j<n; j++ ) { 285 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 286 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 287 if (idxn[j] >= cstart && idxn[j] < cend){ 288 col = idxn[j] - cstart; 289 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 290 } else { 291 if (!aij->colmap) { 292 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 293 } 294 #if defined (PETSC_USE_CTABLE) 295 ierr = TableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 296 col --; 297 #else 298 col = aij->colmap[idxn[j]] - 1; 299 #endif 300 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 301 else { 302 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 303 } 304 } 305 } 306 } else { 307 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 308 } 309 } 310 PetscFunctionReturn(0); 311 } 312 313 #undef __FUNC__ 314 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 315 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 316 { 317 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 318 int ierr,nstash,reallocs; 319 InsertMode addv; 320 321 PetscFunctionBegin; 322 if (aij->donotstash) { 323 PetscFunctionReturn(0); 324 } 325 326 /* make sure all processors are either in INSERTMODE or ADDMODE */ 327 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 328 if (addv == (ADD_VALUES|INSERT_VALUES)) { 329 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 330 } 331 mat->insertmode = addv; /* in case this processor had no cache */ 332 333 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 334 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 335 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 336 PetscFunctionReturn(0); 337 } 338 339 340 #undef __FUNC__ 341 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 342 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 343 { 344 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 345 int i,j,rstart,ncols,n,ierr,flg; 346 int *row,*col,other_disassembled; 347 Scalar *val; 348 InsertMode addv = mat->insertmode; 349 350 PetscFunctionBegin; 351 if (!aij->donotstash) { 352 while (1) { 353 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 354 if (!flg) break; 355 356 for ( i=0; i<n; ) { 357 /* Now identify the consecutive vals belonging to the same row */ 358 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 359 if (j < n) ncols = j-i; 360 else ncols = n-i; 361 /* Now assemble all these values with a single function call */ 362 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 363 i = j; 364 } 365 } 366 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 367 } 368 369 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 370 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 371 372 /* determine if any processor has disassembled, if so we must 373 also disassemble ourselfs, in order that we may reassemble. */ 374 /* 375 if nonzero structure of submatrix B cannot change then we know that 376 no processor disassembled thus we can skip this stuff 377 */ 378 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 379 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 380 if (mat->was_assembled && !other_disassembled) { 381 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 382 } 383 } 384 385 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 386 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 387 } 388 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 389 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 390 391 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 392 PetscFunctionReturn(0); 393 } 394 395 #undef __FUNC__ 396 #define __FUNC__ "MatZeroEntries_MPIAIJ" 397 int MatZeroEntries_MPIAIJ(Mat A) 398 { 399 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 400 int ierr; 401 402 PetscFunctionBegin; 403 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 404 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNC__ 409 #define __FUNC__ "MatZeroRows_MPIAIJ" 410 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 411 { 412 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 413 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 414 int *procs,*nprocs,j,found,idx,nsends,*work,row; 415 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 416 int *rvalues,tag = A->tag,count,base,slen,n,*source; 417 int *lens,imdex,*lrows,*values,rstart=l->rstart; 418 MPI_Comm comm = A->comm; 419 MPI_Request *send_waits,*recv_waits; 420 MPI_Status recv_status,*send_status; 421 IS istmp; 422 423 PetscFunctionBegin; 424 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 425 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 426 427 /* first count number of contributors to each processor */ 428 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 429 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 430 procs = nprocs + size; 431 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 432 for ( i=0; i<N; i++ ) { 433 idx = rows[i]; 434 found = 0; 435 for ( j=0; j<size; j++ ) { 436 if (idx >= owners[j] && idx < owners[j+1]) { 437 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 438 } 439 } 440 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 441 } 442 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 443 444 /* inform other processors of number of messages and max length*/ 445 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 446 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 447 nrecvs = work[rank]; 448 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 449 nmax = work[rank]; 450 PetscFree(work); 451 452 /* post receives: */ 453 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 454 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 455 for ( i=0; i<nrecvs; i++ ) { 456 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 457 } 458 459 /* do sends: 460 1) starts[i] gives the starting index in svalues for stuff going to 461 the ith processor 462 */ 463 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 464 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 465 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 466 starts[0] = 0; 467 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 468 for ( i=0; i<N; i++ ) { 469 svalues[starts[owner[i]]++] = rows[i]; 470 } 471 ISRestoreIndices(is,&rows); 472 473 starts[0] = 0; 474 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 475 count = 0; 476 for ( i=0; i<size; i++ ) { 477 if (procs[i]) { 478 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 479 } 480 } 481 PetscFree(starts); 482 483 base = owners[rank]; 484 485 /* wait on receives */ 486 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 487 source = lens + nrecvs; 488 count = nrecvs; slen = 0; 489 while (count) { 490 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 491 /* unpack receives into our local space */ 492 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 493 source[imdex] = recv_status.MPI_SOURCE; 494 lens[imdex] = n; 495 slen += n; 496 count--; 497 } 498 PetscFree(recv_waits); 499 500 /* move the data into the send scatter */ 501 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 502 count = 0; 503 for ( i=0; i<nrecvs; i++ ) { 504 values = rvalues + i*nmax; 505 for ( j=0; j<lens[i]; j++ ) { 506 lrows[count++] = values[j] - base; 507 } 508 } 509 PetscFree(rvalues); PetscFree(lens); 510 PetscFree(owner); PetscFree(nprocs); 511 512 /* actually zap the local rows */ 513 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 514 PLogObjectParent(A,istmp); 515 516 /* 517 Zero the required rows. If the "diagonal block" of the matrix 518 is square and the user wishes to set the diagonal we use seperate 519 code so that MatSetValues() is not called for each diagonal allocating 520 new memory, thus calling lots of mallocs and slowing things down. 521 522 Contributed by: Mathew Knepley 523 */ 524 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 525 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 526 if (diag && (l->A->M == l->A->N)) { 527 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 528 } else if (diag) { 529 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 530 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 531 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 532 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 533 } 534 for ( i = 0; i < slen; i++ ) { 535 row = lrows[i] + rstart; 536 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 537 } 538 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 539 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 540 } else { 541 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 542 } 543 ierr = ISDestroy(istmp);CHKERRQ(ierr); 544 PetscFree(lrows); 545 546 /* wait on sends */ 547 if (nsends) { 548 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 549 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 550 PetscFree(send_status); 551 } 552 PetscFree(send_waits); PetscFree(svalues); 553 554 PetscFunctionReturn(0); 555 } 556 557 #undef __FUNC__ 558 #define __FUNC__ "MatMult_MPIAIJ" 559 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 560 { 561 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 562 int ierr,nt; 563 564 PetscFunctionBegin; 565 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 566 if (nt != a->n) { 567 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 568 } 569 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 570 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 571 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 572 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 573 PetscFunctionReturn(0); 574 } 575 576 #undef __FUNC__ 577 #define __FUNC__ "MatMultAdd_MPIAIJ" 578 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 579 { 580 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 581 int ierr; 582 583 PetscFunctionBegin; 584 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 585 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 586 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 587 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 588 PetscFunctionReturn(0); 589 } 590 591 #undef __FUNC__ 592 #define __FUNC__ "MatMultTrans_MPIAIJ" 593 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 594 { 595 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 596 int ierr; 597 598 PetscFunctionBegin; 599 /* do nondiagonal part */ 600 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 601 /* send it on its way */ 602 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 603 /* do local part */ 604 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 605 /* receive remote parts: note this assumes the values are not actually */ 606 /* inserted in yy until the next line, which is true for my implementation*/ 607 /* but is not perhaps always true. */ 608 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNC__ 613 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 614 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 615 { 616 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 617 int ierr; 618 619 PetscFunctionBegin; 620 /* do nondiagonal part */ 621 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 622 /* send it on its way */ 623 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 624 /* do local part */ 625 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 626 /* receive remote parts: note this assumes the values are not actually */ 627 /* inserted in yy until the next line, which is true for my implementation*/ 628 /* but is not perhaps always true. */ 629 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 630 PetscFunctionReturn(0); 631 } 632 633 /* 634 This only works correctly for square matrices where the subblock A->A is the 635 diagonal block 636 */ 637 #undef __FUNC__ 638 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 639 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 640 { 641 int ierr; 642 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 643 644 PetscFunctionBegin; 645 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 646 if (a->rstart != a->cstart || a->rend != a->cend) { 647 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 648 } 649 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNC__ 654 #define __FUNC__ "MatScale_MPIAIJ" 655 int MatScale_MPIAIJ(Scalar *aa,Mat A) 656 { 657 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 658 int ierr; 659 660 PetscFunctionBegin; 661 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 662 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNC__ 667 #define __FUNC__ "MatDestroy_MPIAIJ" 668 int MatDestroy_MPIAIJ(Mat mat) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 671 int ierr; 672 673 PetscFunctionBegin; 674 if (--mat->refct > 0) PetscFunctionReturn(0); 675 676 if (mat->mapping) { 677 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 678 } 679 if (mat->bmapping) { 680 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 681 } 682 if (mat->rmap) { 683 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 684 } 685 if (mat->cmap) { 686 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 687 } 688 #if defined(PETSC_USE_LOG) 689 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 690 #endif 691 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 692 PetscFree(aij->rowners); 693 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 694 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 695 #if defined (PETSC_USE_CTABLE) 696 if (aij->colmap) TableDelete(aij->colmap); 697 #else 698 if (aij->colmap) PetscFree(aij->colmap); 699 #endif 700 if (aij->garray) PetscFree(aij->garray); 701 if (aij->lvec) VecDestroy(aij->lvec); 702 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 703 if (aij->rowvalues) PetscFree(aij->rowvalues); 704 PetscFree(aij); 705 PLogObjectDestroy(mat); 706 PetscHeaderDestroy(mat); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNC__ 711 #define __FUNC__ "MatView_MPIAIJ_Binary" 712 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 713 { 714 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 715 int ierr; 716 717 PetscFunctionBegin; 718 if (aij->size == 1) { 719 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 720 } 721 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNC__ 726 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 727 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 728 { 729 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 730 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 731 int ierr, format,shift = C->indexshift,rank = aij->rank ; 732 int size = aij->size; 733 FILE *fd; 734 ViewerType vtype; 735 736 PetscFunctionBegin; 737 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 738 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 739 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 740 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 741 MatInfo info; 742 int flg; 743 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 744 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 745 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 746 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 747 PetscSequentialPhaseBegin(mat->comm,1); 748 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 749 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 750 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 751 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 752 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 753 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 754 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 755 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 756 fflush(fd); 757 PetscSequentialPhaseEnd(mat->comm,1); 758 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 759 PetscFunctionReturn(0); 760 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 761 PetscFunctionReturn(0); 762 } 763 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 764 Draw draw; 765 PetscTruth isnull; 766 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 767 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 768 } 769 770 if (size == 1) { 771 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 772 } else { 773 /* assemble the entire matrix onto first processor. */ 774 Mat A; 775 Mat_SeqAIJ *Aloc; 776 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 777 Scalar *a; 778 779 if (!rank) { 780 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 781 } else { 782 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 783 } 784 PLogObjectParent(mat,A); 785 786 /* copy over the A part */ 787 Aloc = (Mat_SeqAIJ*) aij->A->data; 788 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 789 row = aij->rstart; 790 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 791 for ( i=0; i<m; i++ ) { 792 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 793 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 794 } 795 aj = Aloc->j; 796 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 797 798 /* copy over the B part */ 799 Aloc = (Mat_SeqAIJ*) aij->B->data; 800 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 801 row = aij->rstart; 802 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols); 803 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 804 for ( i=0; i<m; i++ ) { 805 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 806 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 807 } 808 PetscFree(ct); 809 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 810 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 811 /* 812 Everyone has to call to draw the matrix since the graphics waits are 813 synchronized across all processors that share the Draw object 814 */ 815 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 816 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer);CHKERRQ(ierr); 817 } 818 ierr = MatDestroy(A);CHKERRQ(ierr); 819 } 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNC__ 824 #define __FUNC__ "MatView_MPIAIJ" 825 int MatView_MPIAIJ(Mat mat,Viewer viewer) 826 { 827 int ierr; 828 ViewerType vtype; 829 830 PetscFunctionBegin; 831 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 832 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 833 PetscTypeCompare(vtype,SOCKET_VIEWER) || PetscTypeCompare(vtype,BINARY_VIEWER)) { 834 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 835 /* 836 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 837 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 838 */ 839 } else { 840 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 841 } 842 PetscFunctionReturn(0); 843 } 844 845 /* 846 This has to provide several versions. 847 848 2) a) use only local smoothing updating outer values only once. 849 b) local smoothing updating outer values each inner iteration 850 3) color updating out values betwen colors. 851 */ 852 #undef __FUNC__ 853 #define __FUNC__ "MatRelax_MPIAIJ" 854 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 855 double fshift,int its,Vec xx) 856 { 857 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 858 Mat AA = mat->A, BB = mat->B; 859 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 860 Scalar *b,*x,*xs,*ls,d,*v,sum; 861 int ierr,*idx, *diag; 862 int n = mat->n, m = mat->m, i,shift = A->indexshift; 863 864 PetscFunctionBegin; 865 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 866 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 867 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 868 xs = x + shift; /* shift by one for index start of 1 */ 869 ls = ls + shift; 870 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);} 871 diag = A->diag; 872 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 873 if (flag & SOR_ZERO_INITIAL_GUESS) { 874 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 875 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 876 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 877 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 878 PetscFunctionReturn(0); 879 } 880 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 881 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 882 while (its--) { 883 /* go down through the rows */ 884 for ( i=0; i<m; i++ ) { 885 n = A->i[i+1] - A->i[i]; 886 PLogFlops(4*n+3); 887 idx = A->j + A->i[i] + shift; 888 v = A->a + A->i[i] + shift; 889 sum = b[i]; 890 SPARSEDENSEMDOT(sum,xs,v,idx,n); 891 d = fshift + A->a[diag[i]+shift]; 892 n = B->i[i+1] - B->i[i]; 893 idx = B->j + B->i[i] + shift; 894 v = B->a + B->i[i] + shift; 895 SPARSEDENSEMDOT(sum,ls,v,idx,n); 896 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 897 } 898 /* come up through the rows */ 899 for ( i=m-1; i>-1; i-- ) { 900 n = A->i[i+1] - A->i[i]; 901 PLogFlops(4*n+3) 902 idx = A->j + A->i[i] + shift; 903 v = A->a + A->i[i] + shift; 904 sum = b[i]; 905 SPARSEDENSEMDOT(sum,xs,v,idx,n); 906 d = fshift + A->a[diag[i]+shift]; 907 n = B->i[i+1] - B->i[i]; 908 idx = B->j + B->i[i] + shift; 909 v = B->a + B->i[i] + shift; 910 SPARSEDENSEMDOT(sum,ls,v,idx,n); 911 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 912 } 913 } 914 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 915 if (flag & SOR_ZERO_INITIAL_GUESS) { 916 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 917 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 918 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 919 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 920 PetscFunctionReturn(0); 921 } 922 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 923 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 924 while (its--) { 925 for ( i=0; i<m; i++ ) { 926 n = A->i[i+1] - A->i[i]; 927 PLogFlops(4*n+3); 928 idx = A->j + A->i[i] + shift; 929 v = A->a + A->i[i] + shift; 930 sum = b[i]; 931 SPARSEDENSEMDOT(sum,xs,v,idx,n); 932 d = fshift + A->a[diag[i]+shift]; 933 n = B->i[i+1] - B->i[i]; 934 idx = B->j + B->i[i] + shift; 935 v = B->a + B->i[i] + shift; 936 SPARSEDENSEMDOT(sum,ls,v,idx,n); 937 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 938 } 939 } 940 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 941 if (flag & SOR_ZERO_INITIAL_GUESS) { 942 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 943 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 944 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 945 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 946 PetscFunctionReturn(0); 947 } 948 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 949 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 950 while (its--) { 951 for ( i=m-1; i>-1; i-- ) { 952 n = A->i[i+1] - A->i[i]; 953 PLogFlops(4*n+3); 954 idx = A->j + A->i[i] + shift; 955 v = A->a + A->i[i] + shift; 956 sum = b[i]; 957 SPARSEDENSEMDOT(sum,xs,v,idx,n); 958 d = fshift + A->a[diag[i]+shift]; 959 n = B->i[i+1] - B->i[i]; 960 idx = B->j + B->i[i] + shift; 961 v = B->a + B->i[i] + shift; 962 SPARSEDENSEMDOT(sum,ls,v,idx,n); 963 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 964 } 965 } 966 } else { 967 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 968 } 969 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 970 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 971 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNC__ 976 #define __FUNC__ "MatGetInfo_MPIAIJ" 977 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 978 { 979 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 980 Mat A = mat->A, B = mat->B; 981 int ierr; 982 double isend[5], irecv[5]; 983 984 PetscFunctionBegin; 985 info->block_size = 1.0; 986 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 987 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 988 isend[3] = info->memory; isend[4] = info->mallocs; 989 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 990 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 991 isend[3] += info->memory; isend[4] += info->mallocs; 992 if (flag == MAT_LOCAL) { 993 info->nz_used = isend[0]; 994 info->nz_allocated = isend[1]; 995 info->nz_unneeded = isend[2]; 996 info->memory = isend[3]; 997 info->mallocs = isend[4]; 998 } else if (flag == MAT_GLOBAL_MAX) { 999 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1000 info->nz_used = irecv[0]; 1001 info->nz_allocated = irecv[1]; 1002 info->nz_unneeded = irecv[2]; 1003 info->memory = irecv[3]; 1004 info->mallocs = irecv[4]; 1005 } else if (flag == MAT_GLOBAL_SUM) { 1006 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1007 info->nz_used = irecv[0]; 1008 info->nz_allocated = irecv[1]; 1009 info->nz_unneeded = irecv[2]; 1010 info->memory = irecv[3]; 1011 info->mallocs = irecv[4]; 1012 } 1013 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1014 info->fill_ratio_needed = 0; 1015 info->factor_mallocs = 0; 1016 info->rows_global = (double)mat->M; 1017 info->columns_global = (double)mat->N; 1018 info->rows_local = (double)mat->m; 1019 info->columns_local = (double)mat->N; 1020 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNC__ 1025 #define __FUNC__ "MatSetOption_MPIAIJ" 1026 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1027 { 1028 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1029 int ierr; 1030 1031 PetscFunctionBegin; 1032 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1033 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1034 op == MAT_COLUMNS_UNSORTED || 1035 op == MAT_COLUMNS_SORTED || 1036 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1037 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1038 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1039 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1040 } else if (op == MAT_ROW_ORIENTED) { 1041 a->roworiented = 1; 1042 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1043 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1044 } else if (op == MAT_ROWS_SORTED || 1045 op == MAT_ROWS_UNSORTED || 1046 op == MAT_SYMMETRIC || 1047 op == MAT_STRUCTURALLY_SYMMETRIC || 1048 op == MAT_YES_NEW_DIAGONALS) 1049 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1050 else if (op == MAT_COLUMN_ORIENTED) { 1051 a->roworiented = 0; 1052 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1053 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1054 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1055 a->donotstash = 1; 1056 } else if (op == MAT_NO_NEW_DIAGONALS){ 1057 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1058 } else { 1059 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1060 } 1061 PetscFunctionReturn(0); 1062 } 1063 1064 #undef __FUNC__ 1065 #define __FUNC__ "MatGetSize_MPIAIJ" 1066 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1067 { 1068 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1069 1070 PetscFunctionBegin; 1071 if (m) *m = mat->M; 1072 if (n) *n = mat->N; 1073 PetscFunctionReturn(0); 1074 } 1075 1076 #undef __FUNC__ 1077 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1078 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1079 { 1080 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1081 1082 PetscFunctionBegin; 1083 if (m) *m = mat->m; 1084 if (n) *n = mat->n; 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNC__ 1089 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1090 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1091 { 1092 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1093 1094 PetscFunctionBegin; 1095 *m = mat->rstart; *n = mat->rend; 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNC__ 1100 #define __FUNC__ "MatGetRow_MPIAIJ" 1101 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1102 { 1103 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1104 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1105 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1106 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1107 int *cmap, *idx_p; 1108 1109 PetscFunctionBegin; 1110 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1111 mat->getrowactive = PETSC_TRUE; 1112 1113 if (!mat->rowvalues && (idx || v)) { 1114 /* 1115 allocate enough space to hold information from the longest row. 1116 */ 1117 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1118 int max = 1,m = mat->m,tmp; 1119 for ( i=0; i<m; i++ ) { 1120 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1121 if (max < tmp) { max = tmp; } 1122 } 1123 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1124 mat->rowindices = (int *) (mat->rowvalues + max); 1125 } 1126 1127 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1128 lrow = row - rstart; 1129 1130 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1131 if (!v) {pvA = 0; pvB = 0;} 1132 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1133 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1134 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1135 nztot = nzA + nzB; 1136 1137 cmap = mat->garray; 1138 if (v || idx) { 1139 if (nztot) { 1140 /* Sort by increasing column numbers, assuming A and B already sorted */ 1141 int imark = -1; 1142 if (v) { 1143 *v = v_p = mat->rowvalues; 1144 for ( i=0; i<nzB; i++ ) { 1145 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1146 else break; 1147 } 1148 imark = i; 1149 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1150 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1151 } 1152 if (idx) { 1153 *idx = idx_p = mat->rowindices; 1154 if (imark > -1) { 1155 for ( i=0; i<imark; i++ ) { 1156 idx_p[i] = cmap[cworkB[i]]; 1157 } 1158 } else { 1159 for ( i=0; i<nzB; i++ ) { 1160 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1161 else break; 1162 } 1163 imark = i; 1164 } 1165 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1166 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1167 } 1168 } else { 1169 if (idx) *idx = 0; 1170 if (v) *v = 0; 1171 } 1172 } 1173 *nz = nztot; 1174 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1175 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNC__ 1180 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1181 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1182 { 1183 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1184 1185 PetscFunctionBegin; 1186 if (aij->getrowactive == PETSC_FALSE) { 1187 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1188 } 1189 aij->getrowactive = PETSC_FALSE; 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNC__ 1194 #define __FUNC__ "MatNorm_MPIAIJ" 1195 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1196 { 1197 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1198 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1199 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1200 double sum = 0.0; 1201 Scalar *v; 1202 1203 PetscFunctionBegin; 1204 if (aij->size == 1) { 1205 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1206 } else { 1207 if (type == NORM_FROBENIUS) { 1208 v = amat->a; 1209 for (i=0; i<amat->nz; i++ ) { 1210 #if defined(PETSC_USE_COMPLEX) 1211 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1212 #else 1213 sum += (*v)*(*v); v++; 1214 #endif 1215 } 1216 v = bmat->a; 1217 for (i=0; i<bmat->nz; i++ ) { 1218 #if defined(PETSC_USE_COMPLEX) 1219 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1220 #else 1221 sum += (*v)*(*v); v++; 1222 #endif 1223 } 1224 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1225 *norm = sqrt(*norm); 1226 } else if (type == NORM_1) { /* max column norm */ 1227 double *tmp, *tmp2; 1228 int *jj, *garray = aij->garray; 1229 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp); 1230 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2); 1231 ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr); 1232 *norm = 0.0; 1233 v = amat->a; jj = amat->j; 1234 for ( j=0; j<amat->nz; j++ ) { 1235 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1236 } 1237 v = bmat->a; jj = bmat->j; 1238 for ( j=0; j<bmat->nz; j++ ) { 1239 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1240 } 1241 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1242 for ( j=0; j<aij->N; j++ ) { 1243 if (tmp2[j] > *norm) *norm = tmp2[j]; 1244 } 1245 PetscFree(tmp); PetscFree(tmp2); 1246 } else if (type == NORM_INFINITY) { /* max row norm */ 1247 double ntemp = 0.0; 1248 for ( j=0; j<amat->m; j++ ) { 1249 v = amat->a + amat->i[j] + shift; 1250 sum = 0.0; 1251 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1252 sum += PetscAbsScalar(*v); v++; 1253 } 1254 v = bmat->a + bmat->i[j] + shift; 1255 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1256 sum += PetscAbsScalar(*v); v++; 1257 } 1258 if (sum > ntemp) ntemp = sum; 1259 } 1260 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1261 } else { 1262 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1263 } 1264 } 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNC__ 1269 #define __FUNC__ "MatTranspose_MPIAIJ" 1270 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1271 { 1272 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1273 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1274 int ierr,shift = Aloc->indexshift; 1275 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1276 Mat B; 1277 Scalar *array; 1278 1279 PetscFunctionBegin; 1280 if (matout == PETSC_NULL && M != N) { 1281 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1282 } 1283 1284 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1285 1286 /* copy over the A part */ 1287 Aloc = (Mat_SeqAIJ*) a->A->data; 1288 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1289 row = a->rstart; 1290 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1291 for ( i=0; i<m; i++ ) { 1292 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1293 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1294 } 1295 aj = Aloc->j; 1296 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1297 1298 /* copy over the B part */ 1299 Aloc = (Mat_SeqAIJ*) a->B->data; 1300 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1301 row = a->rstart; 1302 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols); 1303 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1304 for ( i=0; i<m; i++ ) { 1305 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1306 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1307 } 1308 PetscFree(ct); 1309 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1310 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1311 if (matout != PETSC_NULL) { 1312 *matout = B; 1313 } else { 1314 PetscOps *Abops; 1315 struct _MatOps *Aops; 1316 1317 /* This isn't really an in-place transpose .... but free data structures from a */ 1318 PetscFree(a->rowners); 1319 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1320 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1321 #if defined (PETSC_USE_CTABLE) 1322 if (a->colmap) TableDelete(a->colmap); 1323 #else 1324 if (a->colmap) PetscFree(a->colmap); 1325 #endif 1326 if (a->garray) PetscFree(a->garray); 1327 if (a->lvec) VecDestroy(a->lvec); 1328 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1329 PetscFree(a); 1330 1331 /* 1332 This is horrible, horrible code. We need to keep the 1333 A pointers for the bops and ops but copy everything 1334 else from C. 1335 */ 1336 Abops = A->bops; 1337 Aops = A->ops; 1338 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1339 A->bops = Abops; 1340 A->ops = Aops; 1341 PetscHeaderDestroy(B); 1342 } 1343 PetscFunctionReturn(0); 1344 } 1345 1346 #undef __FUNC__ 1347 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1348 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1349 { 1350 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1351 Mat a = aij->A, b = aij->B; 1352 int ierr,s1,s2,s3; 1353 1354 PetscFunctionBegin; 1355 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1356 if (rr) { 1357 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1358 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1359 /* Overlap communication with computation. */ 1360 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1361 } 1362 if (ll) { 1363 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1364 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1365 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1366 } 1367 /* scale the diagonal block */ 1368 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1369 1370 if (rr) { 1371 /* Do a scatter end and then right scale the off-diagonal block */ 1372 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1373 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1374 } 1375 1376 PetscFunctionReturn(0); 1377 } 1378 1379 1380 #undef __FUNC__ 1381 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1382 int MatPrintHelp_MPIAIJ(Mat A) 1383 { 1384 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1385 int ierr; 1386 1387 PetscFunctionBegin; 1388 if (!a->rank) { 1389 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1390 } 1391 PetscFunctionReturn(0); 1392 } 1393 1394 #undef __FUNC__ 1395 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1396 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1397 { 1398 PetscFunctionBegin; 1399 *bs = 1; 1400 PetscFunctionReturn(0); 1401 } 1402 #undef __FUNC__ 1403 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1404 int MatSetUnfactored_MPIAIJ(Mat A) 1405 { 1406 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1407 int ierr; 1408 1409 PetscFunctionBegin; 1410 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 #undef __FUNC__ 1415 #define __FUNC__ "MatEqual_MPIAIJ" 1416 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1417 { 1418 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1419 Mat a, b, c, d; 1420 PetscTruth flg; 1421 int ierr; 1422 1423 PetscFunctionBegin; 1424 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1425 a = matA->A; b = matA->B; 1426 c = matB->A; d = matB->B; 1427 1428 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1429 if (flg == PETSC_TRUE) { 1430 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1431 } 1432 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNC__ 1437 #define __FUNC__ "MatCopy_MPIAIJ" 1438 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1439 { 1440 int ierr; 1441 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1442 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1443 1444 PetscFunctionBegin; 1445 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1446 /* because of the column compression in the off-processor part of the matrix a->B, 1447 the number of columns in a->B and b->B may be different, hence we cannot call 1448 the MatCopy() directly on the two parts. If need be, we can provide a more 1449 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1450 then copying the submatrices */ 1451 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1452 } else { 1453 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1454 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1460 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1461 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1462 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 1463 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 1464 1465 /* -------------------------------------------------------------------*/ 1466 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1467 MatGetRow_MPIAIJ, 1468 MatRestoreRow_MPIAIJ, 1469 MatMult_MPIAIJ, 1470 MatMultAdd_MPIAIJ, 1471 MatMultTrans_MPIAIJ, 1472 MatMultTransAdd_MPIAIJ, 1473 0, 1474 0, 1475 0, 1476 0, 1477 0, 1478 0, 1479 MatRelax_MPIAIJ, 1480 MatTranspose_MPIAIJ, 1481 MatGetInfo_MPIAIJ, 1482 MatEqual_MPIAIJ, 1483 MatGetDiagonal_MPIAIJ, 1484 MatDiagonalScale_MPIAIJ, 1485 MatNorm_MPIAIJ, 1486 MatAssemblyBegin_MPIAIJ, 1487 MatAssemblyEnd_MPIAIJ, 1488 0, 1489 MatSetOption_MPIAIJ, 1490 MatZeroEntries_MPIAIJ, 1491 MatZeroRows_MPIAIJ, 1492 0, 1493 0, 1494 0, 1495 0, 1496 MatGetSize_MPIAIJ, 1497 MatGetLocalSize_MPIAIJ, 1498 MatGetOwnershipRange_MPIAIJ, 1499 0, 1500 0, 1501 0, 1502 0, 1503 MatDuplicate_MPIAIJ, 1504 0, 1505 0, 1506 0, 1507 0, 1508 0, 1509 MatGetSubMatrices_MPIAIJ, 1510 MatIncreaseOverlap_MPIAIJ, 1511 MatGetValues_MPIAIJ, 1512 MatCopy_MPIAIJ, 1513 MatPrintHelp_MPIAIJ, 1514 MatScale_MPIAIJ, 1515 0, 1516 0, 1517 0, 1518 MatGetBlockSize_MPIAIJ, 1519 0, 1520 0, 1521 0, 1522 0, 1523 MatFDColoringCreate_MPIAIJ, 1524 0, 1525 MatSetUnfactored_MPIAIJ, 1526 0, 1527 0, 1528 MatGetSubMatrix_MPIAIJ, 1529 0, 1530 0, 1531 MatGetMaps_Petsc}; 1532 1533 /* ----------------------------------------------------------------------------------------*/ 1534 1535 EXTERN_C_BEGIN 1536 #undef __FUNC__ 1537 #define __FUNC__ "MatStoreValues_MPIAIJ" 1538 int MatStoreValues_MPIAIJ(Mat mat) 1539 { 1540 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1541 int ierr; 1542 1543 PetscFunctionBegin; 1544 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1545 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 EXTERN_C_END 1549 1550 EXTERN_C_BEGIN 1551 #undef __FUNC__ 1552 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1553 int MatRetrieveValues_MPIAIJ(Mat mat) 1554 { 1555 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1556 int ierr; 1557 1558 PetscFunctionBegin; 1559 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1560 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1561 PetscFunctionReturn(0); 1562 } 1563 EXTERN_C_END 1564 1565 #include "pc.h" 1566 EXTERN_C_BEGIN 1567 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1568 EXTERN_C_END 1569 1570 #undef __FUNC__ 1571 #define __FUNC__ "MatCreateMPIAIJ" 1572 /*@C 1573 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1574 (the default parallel PETSc format). For good matrix assembly performance 1575 the user should preallocate the matrix storage by setting the parameters 1576 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1577 performance can be increased by more than a factor of 50. 1578 1579 Collective on MPI_Comm 1580 1581 Input Parameters: 1582 + comm - MPI communicator 1583 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1584 This value should be the same as the local size used in creating the 1585 y vector for the matrix-vector product y = Ax. 1586 . n - This value should be the same as the local size used in creating the 1587 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1588 calculated if N is given) For square matrices n is almost always m. 1589 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1590 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1591 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1592 (same value is used for all local rows) 1593 . d_nnz - array containing the number of nonzeros in the various rows of the 1594 DIAGONAL portion of the local submatrix (possibly different for each row) 1595 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1596 The size of this array is equal to the number of local rows, i.e 'm'. 1597 You must leave room for the diagonal entry even if it is zero. 1598 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1599 submatrix (same value is used for all local rows). 1600 - o_nnz - array containing the number of nonzeros in the various rows of the 1601 OFF-DIAGONAL portion of the local submatrix (possibly different for 1602 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1603 structure. The size of this array is equal to the number 1604 of local rows, i.e 'm'. 1605 1606 Output Parameter: 1607 . A - the matrix 1608 1609 Notes: 1610 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1611 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1612 storage requirements for this matrix. 1613 1614 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1615 processor than it must be used on all processors that share the object for 1616 that argument. 1617 1618 The AIJ format (also called the Yale sparse matrix format or 1619 compressed row storage), is fully compatible with standard Fortran 77 1620 storage. That is, the stored row and column indices can begin at 1621 either one (as in Fortran) or zero. See the users manual for details. 1622 1623 The user MUST specify either the local or global matrix dimensions 1624 (possibly both). 1625 1626 The parallel matrix is partitioned such that the first m0 rows belong to 1627 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1628 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1629 1630 The DIAGONAL portion of the local submatrix of a processor can be defined 1631 as the submatrix which is obtained by extraction the part corresponding 1632 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1633 first row that belongs to the processor, and r2 is the last row belonging 1634 to the this processor. This is a square mxm matrix. The remaining portion 1635 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1636 1637 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1638 1639 By default, this format uses inodes (identical nodes) when possible. 1640 We search for consecutive rows with the same nonzero structure, thereby 1641 reusing matrix information to achieve increased efficiency. 1642 1643 Options Database Keys: 1644 + -mat_aij_no_inode - Do not use inodes 1645 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1646 - -mat_aij_oneindex - Internally use indexing starting at 1 1647 rather than 0. Note that when calling MatSetValues(), 1648 the user still MUST index entries starting at 0! 1649 . -mat_mpi - use the parallel matrix data structures even on one processor 1650 (defaults to using SeqBAIJ format on one processor) 1651 . -mat_mpi - use the parallel matrix data structures even on one processor 1652 (defaults to using SeqAIJ format on one processor) 1653 1654 1655 Example usage: 1656 1657 Consider the following 8x8 matrix with 34 non-zero values, that is 1658 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1659 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1660 as follows: 1661 1662 .vb 1663 1 2 0 | 0 3 0 | 0 4 1664 Proc0 0 5 6 | 7 0 0 | 8 0 1665 9 0 10 | 11 0 0 | 12 0 1666 ------------------------------------- 1667 13 0 14 | 15 16 17 | 0 0 1668 Proc1 0 18 0 | 19 20 21 | 0 0 1669 0 0 0 | 22 23 0 | 24 0 1670 ------------------------------------- 1671 Proc2 25 26 27 | 0 0 28 | 29 0 1672 30 0 0 | 31 32 33 | 0 34 1673 .ve 1674 1675 This can be represented as a collection of submatrices as: 1676 1677 .vb 1678 A B C 1679 D E F 1680 G H I 1681 .ve 1682 1683 Where the submatrices A,B,C are owned by proc0, D,E,F are 1684 owned by proc1, G,H,I are owned by proc2. 1685 1686 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1687 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1688 The 'M','N' parameters are 8,8, and have the same values on all procs. 1689 1690 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1691 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1692 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1693 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1694 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1695 matrix, ans [DF] as another SeqAIJ matrix. 1696 1697 When d_nz, o_nz parameters are specified, d_nz storage elements are 1698 allocated for every row of the local diagonal submatrix, and o_nz 1699 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1700 One way to choose d_nz and o_nz is to use the max nonzerors per local 1701 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1702 In this case, the values of d_nz,o_nz are: 1703 .vb 1704 proc0 : dnz = 2, o_nz = 2 1705 proc1 : dnz = 3, o_nz = 2 1706 proc2 : dnz = 1, o_nz = 4 1707 .ve 1708 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1709 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1710 for proc3. i.e we are using 12+15+10=37 storage locations to store 1711 34 values. 1712 1713 When d_nnz, o_nnz parameters are specified, the storage is specified 1714 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1715 In the above case the values for d_nnz,o_nnz are: 1716 .vb 1717 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1718 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1719 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1720 .ve 1721 Here the space allocated is sum of all the above values i.e 34, and 1722 hence pre-allocation is perfect. 1723 1724 Level: intermediate 1725 1726 .keywords: matrix, aij, compressed row, sparse, parallel 1727 1728 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1729 @*/ 1730 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1731 { 1732 Mat B; 1733 Mat_MPIAIJ *b; 1734 int ierr, i,size,flag1 = 0, flag2 = 0; 1735 1736 PetscFunctionBegin; 1737 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1738 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr); 1739 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1740 if (!flag1 && !flag2 && size == 1) { 1741 if (M == PETSC_DECIDE) M = m; 1742 if (N == PETSC_DECIDE) N = n; 1743 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 1744 PetscFunctionReturn(0); 1745 } 1746 1747 *A = 0; 1748 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1749 PLogObjectCreate(B); 1750 B->data = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b); 1751 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1752 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1753 B->ops->destroy = MatDestroy_MPIAIJ; 1754 B->ops->view = MatView_MPIAIJ; 1755 B->factor = 0; 1756 B->assembled = PETSC_FALSE; 1757 B->mapping = 0; 1758 1759 B->insertmode = NOT_SET_VALUES; 1760 b->size = size; 1761 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1762 1763 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1764 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1765 } 1766 1767 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1768 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1769 b->m = m; B->m = m; 1770 b->n = n; B->n = n; 1771 b->N = N; B->N = N; 1772 b->M = M; B->M = M; 1773 1774 /* the information in the maps duplicates the information computed below, eventually 1775 we should remove the duplicate information that is not contained in the maps */ 1776 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1777 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1778 1779 /* build local table of row and column ownerships */ 1780 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1781 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1782 b->cowners = b->rowners + b->size + 2; 1783 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1784 b->rowners[0] = 0; 1785 for ( i=2; i<=b->size; i++ ) { 1786 b->rowners[i] += b->rowners[i-1]; 1787 } 1788 b->rstart = b->rowners[b->rank]; 1789 b->rend = b->rowners[b->rank+1]; 1790 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1791 b->cowners[0] = 0; 1792 for ( i=2; i<=b->size; i++ ) { 1793 b->cowners[i] += b->cowners[i-1]; 1794 } 1795 b->cstart = b->cowners[b->rank]; 1796 b->cend = b->cowners[b->rank+1]; 1797 1798 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1799 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1800 PLogObjectParent(B,b->A); 1801 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1802 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1803 PLogObjectParent(B,b->B); 1804 1805 /* build cache for off array entries formed */ 1806 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 1807 b->donotstash = 0; 1808 b->colmap = 0; 1809 b->garray = 0; 1810 b->roworiented = 1; 1811 1812 /* stuff used for matrix vector multiply */ 1813 b->lvec = 0; 1814 b->Mvctx = 0; 1815 1816 /* stuff for MatGetRow() */ 1817 b->rowindices = 0; 1818 b->rowvalues = 0; 1819 b->getrowactive = PETSC_FALSE; 1820 1821 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1822 "MatStoreValues_MPIAIJ", 1823 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1824 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1825 "MatRetrieveValues_MPIAIJ", 1826 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1827 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 1828 "MatGetDiagonalBlock_MPIAIJ", 1829 (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1830 *A = B; 1831 PetscFunctionReturn(0); 1832 } 1833 1834 #undef __FUNC__ 1835 #define __FUNC__ "MatDuplicate_MPIAIJ" 1836 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1837 { 1838 Mat mat; 1839 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1840 int ierr, len=0, flg; 1841 1842 PetscFunctionBegin; 1843 *newmat = 0; 1844 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1845 PLogObjectCreate(mat); 1846 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a); 1847 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1848 mat->ops->destroy = MatDestroy_MPIAIJ; 1849 mat->ops->view = MatView_MPIAIJ; 1850 mat->factor = matin->factor; 1851 mat->assembled = PETSC_TRUE; 1852 mat->insertmode = NOT_SET_VALUES; 1853 1854 a->m = mat->m = oldmat->m; 1855 a->n = mat->n = oldmat->n; 1856 a->M = mat->M = oldmat->M; 1857 a->N = mat->N = oldmat->N; 1858 1859 a->rstart = oldmat->rstart; 1860 a->rend = oldmat->rend; 1861 a->cstart = oldmat->cstart; 1862 a->cend = oldmat->cend; 1863 a->size = oldmat->size; 1864 a->rank = oldmat->rank; 1865 a->donotstash = oldmat->donotstash; 1866 a->roworiented = oldmat->roworiented; 1867 a->rowindices = 0; 1868 a->rowvalues = 0; 1869 a->getrowactive = PETSC_FALSE; 1870 1871 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1872 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1873 a->cowners = a->rowners + a->size + 2; 1874 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1875 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1876 if (oldmat->colmap) { 1877 #if defined (PETSC_USE_CTABLE) 1878 ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1879 #else 1880 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1881 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1882 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr); 1883 #endif 1884 } else a->colmap = 0; 1885 if (oldmat->garray) { 1886 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1887 a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray); 1888 PLogObjectMemory(mat,len*sizeof(int)); 1889 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1890 } else a->garray = 0; 1891 1892 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1893 PLogObjectParent(mat,a->lvec); 1894 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1895 PLogObjectParent(mat,a->Mvctx); 1896 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1897 PLogObjectParent(mat,a->A); 1898 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1899 PLogObjectParent(mat,a->B); 1900 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1901 if (flg) { 1902 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1903 } 1904 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1905 *newmat = mat; 1906 PetscFunctionReturn(0); 1907 } 1908 1909 #include "sys.h" 1910 1911 #undef __FUNC__ 1912 #define __FUNC__ "MatLoad_MPIAIJ" 1913 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1914 { 1915 Mat A; 1916 Scalar *vals,*svals; 1917 MPI_Comm comm = ((PetscObject)viewer)->comm; 1918 MPI_Status status; 1919 int i, nz, ierr, j,rstart, rend, fd; 1920 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1921 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1922 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1923 1924 PetscFunctionBegin; 1925 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1926 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1927 if (!rank) { 1928 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1929 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1930 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1931 if (header[3] < 0) { 1932 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1933 } 1934 } 1935 1936 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1937 M = header[1]; N = header[2]; 1938 /* determine ownership of all rows */ 1939 m = M/size + ((M % size) > rank); 1940 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1941 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1942 rowners[0] = 0; 1943 for ( i=2; i<=size; i++ ) { 1944 rowners[i] += rowners[i-1]; 1945 } 1946 rstart = rowners[rank]; 1947 rend = rowners[rank+1]; 1948 1949 /* distribute row lengths to all processors */ 1950 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1951 offlens = ourlens + (rend-rstart); 1952 if (!rank) { 1953 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1954 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1955 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 1956 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1957 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1958 PetscFree(sndcounts); 1959 } else { 1960 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1961 } 1962 1963 if (!rank) { 1964 /* calculate the number of nonzeros on each processor */ 1965 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 1966 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1967 for ( i=0; i<size; i++ ) { 1968 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1969 procsnz[i] += rowlengths[j]; 1970 } 1971 } 1972 PetscFree(rowlengths); 1973 1974 /* determine max buffer needed and allocate it */ 1975 maxnz = 0; 1976 for ( i=0; i<size; i++ ) { 1977 maxnz = PetscMax(maxnz,procsnz[i]); 1978 } 1979 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 1980 1981 /* read in my part of the matrix column indices */ 1982 nz = procsnz[0]; 1983 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1984 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1985 1986 /* read in every one elses and ship off */ 1987 for ( i=1; i<size; i++ ) { 1988 nz = procsnz[i]; 1989 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 1990 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1991 } 1992 PetscFree(cols); 1993 } else { 1994 /* determine buffer space needed for message */ 1995 nz = 0; 1996 for ( i=0; i<m; i++ ) { 1997 nz += ourlens[i]; 1998 } 1999 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 2000 2001 /* receive message of column indices*/ 2002 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2003 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2004 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2005 } 2006 2007 /* determine column ownership if matrix is not square */ 2008 if (N != M) { 2009 n = N/size + ((N % size) > rank); 2010 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2011 cstart = cend - n; 2012 } else { 2013 cstart = rstart; 2014 cend = rend; 2015 n = cend - cstart; 2016 } 2017 2018 /* loop over local rows, determining number of off diagonal entries */ 2019 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2020 jj = 0; 2021 for ( i=0; i<m; i++ ) { 2022 for ( j=0; j<ourlens[i]; j++ ) { 2023 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2024 jj++; 2025 } 2026 } 2027 2028 /* create our matrix */ 2029 for ( i=0; i<m; i++ ) { 2030 ourlens[i] -= offlens[i]; 2031 } 2032 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2033 A = *newmat; 2034 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2035 for ( i=0; i<m; i++ ) { 2036 ourlens[i] += offlens[i]; 2037 } 2038 2039 if (!rank) { 2040 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 2041 2042 /* read in my part of the matrix numerical values */ 2043 nz = procsnz[0]; 2044 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2045 2046 /* insert into matrix */ 2047 jj = rstart; 2048 smycols = mycols; 2049 svals = vals; 2050 for ( i=0; i<m; i++ ) { 2051 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2052 smycols += ourlens[i]; 2053 svals += ourlens[i]; 2054 jj++; 2055 } 2056 2057 /* read in other processors and ship out */ 2058 for ( i=1; i<size; i++ ) { 2059 nz = procsnz[i]; 2060 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2061 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2062 } 2063 PetscFree(procsnz); 2064 } else { 2065 /* receive numeric values */ 2066 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 2067 2068 /* receive message of values*/ 2069 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2070 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2071 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2072 2073 /* insert into matrix */ 2074 jj = rstart; 2075 smycols = mycols; 2076 svals = vals; 2077 for ( i=0; i<m; i++ ) { 2078 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2079 smycols += ourlens[i]; 2080 svals += ourlens[i]; 2081 jj++; 2082 } 2083 } 2084 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 2085 2086 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2087 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2088 PetscFunctionReturn(0); 2089 } 2090 2091 #undef __FUNC__ 2092 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2093 /* 2094 Not great since it makes two copies of the submatrix, first an SeqAIJ 2095 in local and then by concatenating the local matrices the end result. 2096 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2097 */ 2098 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2099 { 2100 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2101 Mat *local,M, Mreuse; 2102 Scalar *vwork,*aa; 2103 MPI_Comm comm = mat->comm; 2104 Mat_SeqAIJ *aij; 2105 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2106 2107 PetscFunctionBegin; 2108 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2109 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2110 2111 if (call == MAT_REUSE_MATRIX) { 2112 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2113 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2114 local = &Mreuse; 2115 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2116 } else { 2117 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2118 Mreuse = *local; 2119 PetscFree(local); 2120 } 2121 2122 /* 2123 m - number of local rows 2124 n - number of columns (same on all processors) 2125 rstart - first row in new global matrix generated 2126 */ 2127 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2128 if (call == MAT_INITIAL_MATRIX) { 2129 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2130 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2131 ii = aij->i; 2132 jj = aij->j; 2133 2134 /* 2135 Determine the number of non-zeros in the diagonal and off-diagonal 2136 portions of the matrix in order to do correct preallocation 2137 */ 2138 2139 /* first get start and end of "diagonal" columns */ 2140 if (csize == PETSC_DECIDE) { 2141 nlocal = n/size + ((n % size) > rank); 2142 } else { 2143 nlocal = csize; 2144 } 2145 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2146 rstart = rend - nlocal; 2147 if (rank == size - 1 && rend != n) { 2148 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2149 } 2150 2151 /* next, compute all the lengths */ 2152 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2153 olens = dlens + m; 2154 for ( i=0; i<m; i++ ) { 2155 jend = ii[i+1] - ii[i]; 2156 olen = 0; 2157 dlen = 0; 2158 for ( j=0; j<jend; j++ ) { 2159 if ( *jj < rstart || *jj >= rend) olen++; 2160 else dlen++; 2161 jj++; 2162 } 2163 olens[i] = olen; 2164 dlens[i] = dlen; 2165 } 2166 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2167 PetscFree(dlens); 2168 } else { 2169 int ml,nl; 2170 2171 M = *newmat; 2172 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2173 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2174 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2175 /* 2176 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2177 rather than the slower MatSetValues(). 2178 */ 2179 M->was_assembled = PETSC_TRUE; 2180 M->assembled = PETSC_FALSE; 2181 } 2182 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2183 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2184 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2185 ii = aij->i; 2186 jj = aij->j; 2187 aa = aij->a; 2188 for (i=0; i<m; i++) { 2189 row = rstart + i; 2190 nz = ii[i+1] - ii[i]; 2191 cwork = jj; jj += nz; 2192 vwork = aa; aa += nz; 2193 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2194 } 2195 2196 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2197 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2198 *newmat = M; 2199 2200 /* save submatrix used in processor for next request */ 2201 if (call == MAT_INITIAL_MATRIX) { 2202 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2203 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2204 } 2205 2206 PetscFunctionReturn(0); 2207 } 2208