1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.297 1999/06/30 23:51:10 balay Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 extern int MatSetUpMultiply_MPIAIJ(Mat); 10 extern int DisAssemble_MPIAIJ(Mat); 11 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 12 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 13 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 14 extern int MatPrintHelp_SeqAIJ(Mat); 15 16 /* local utility routine that creates a mapping from the global column 17 number to the local number in the off-diagonal part of the local 18 storage of the matrix. This is done in a non scable way since the 19 length of colmap equals the global matrix length. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 26 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 27 int n = B->n,i,ierr; 28 29 PetscFunctionBegin; 30 #if defined (PETSC_USE_CTABLE) 31 ierr = TableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr); 32 for ( i=0; i<n; i++ ){ 33 ierr = TableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 34 } 35 #else 36 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 37 PLogObjectMemory(mat,aij->N*sizeof(int)); 38 ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr); 39 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 40 #endif 41 PetscFunctionReturn(0); 42 } 43 44 #define CHUNKSIZE 15 45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 46 { \ 47 \ 48 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 49 rmax = aimax[row]; nrow = ailen[row]; \ 50 col1 = col - shift; \ 51 \ 52 low = 0; high = nrow; \ 53 while (high-low > 5) { \ 54 t = (low+high)/2; \ 55 if (rp[t] > col) high = t; \ 56 else low = t; \ 57 } \ 58 for ( _i=0; _i<nrow; _i++ ) { \ 59 if (rp[_i] > col1) break; \ 60 if (rp[_i] == col1) { \ 61 if (addv == ADD_VALUES) ap[_i] += value; \ 62 else ap[_i] = value; \ 63 goto a_noinsert; \ 64 } \ 65 } \ 66 if (nonew == 1) goto a_noinsert; \ 67 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 68 if (nrow >= rmax) { \ 69 /* there is no extra room in row, therefore enlarge */ \ 70 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 71 Scalar *new_a; \ 72 \ 73 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 74 \ 75 /* malloc new storage space */ \ 76 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 77 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 78 new_j = (int *) (new_a + new_nz); \ 79 new_i = new_j + new_nz; \ 80 \ 81 /* copy over old data into new slots */ \ 82 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 83 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 84 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 85 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 86 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 87 len*sizeof(int));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 90 len*sizeof(Scalar));CHKERRQ(ierr); \ 91 /* free up old matrix storage */ \ 92 \ 93 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 94 if (!a->singlemalloc) { \ 95 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 96 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 97 } \ 98 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 99 a->singlemalloc = 1; \ 100 \ 101 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 102 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 103 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 104 a->maxnz += CHUNKSIZE; \ 105 a->reallocs++; \ 106 } \ 107 N = nrow++ - 1; a->nz++; \ 108 /* shift up all the later entries in this row */ \ 109 for ( ii=N; ii>=_i; ii-- ) { \ 110 rp[ii+1] = rp[ii]; \ 111 ap[ii+1] = ap[ii]; \ 112 } \ 113 rp[_i] = col1; \ 114 ap[_i] = value; \ 115 a_noinsert: ; \ 116 ailen[row] = nrow; \ 117 } 118 119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 120 { \ 121 \ 122 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 123 rmax = bimax[row]; nrow = bilen[row]; \ 124 col1 = col - shift; \ 125 \ 126 low = 0; high = nrow; \ 127 while (high-low > 5) { \ 128 t = (low+high)/2; \ 129 if (rp[t] > col) high = t; \ 130 else low = t; \ 131 } \ 132 for ( _i=0; _i<nrow; _i++ ) { \ 133 if (rp[_i] > col1) break; \ 134 if (rp[_i] == col1) { \ 135 if (addv == ADD_VALUES) ap[_i] += value; \ 136 else ap[_i] = value; \ 137 goto b_noinsert; \ 138 } \ 139 } \ 140 if (nonew == 1) goto b_noinsert; \ 141 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 142 if (nrow >= rmax) { \ 143 /* there is no extra room in row, therefore enlarge */ \ 144 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 145 Scalar *new_a; \ 146 \ 147 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 148 \ 149 /* malloc new storage space */ \ 150 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 151 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 152 new_j = (int *) (new_a + new_nz); \ 153 new_i = new_j + new_nz; \ 154 \ 155 /* copy over old data into new slots */ \ 156 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 157 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 158 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 159 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 160 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 161 len*sizeof(int));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 163 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 164 len*sizeof(Scalar));CHKERRQ(ierr); \ 165 /* free up old matrix storage */ \ 166 \ 167 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 168 if (!b->singlemalloc) { \ 169 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 170 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 171 } \ 172 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 173 b->singlemalloc = 1; \ 174 \ 175 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 176 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 177 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 178 b->maxnz += CHUNKSIZE; \ 179 b->reallocs++; \ 180 } \ 181 N = nrow++ - 1; b->nz++; \ 182 /* shift up all the later entries in this row */ \ 183 for ( ii=N; ii>=_i; ii-- ) { \ 184 rp[ii+1] = rp[ii]; \ 185 ap[ii+1] = ap[ii]; \ 186 } \ 187 rp[_i] = col1; \ 188 ap[_i] = value; \ 189 b_noinsert: ; \ 190 bilen[row] = nrow; \ 191 } 192 193 #undef __FUNC__ 194 #define __FUNC__ "MatSetValues_MPIAIJ" 195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 196 { 197 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 198 Scalar value; 199 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 200 int cstart = aij->cstart, cend = aij->cend,row,col; 201 int roworiented = aij->roworiented; 202 203 /* Some Variables required in the macro */ 204 Mat A = aij->A; 205 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 206 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 207 Scalar *aa = a->a; 208 209 Mat B = aij->B; 210 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 211 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 212 Scalar *ba = b->a; 213 214 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 215 int nonew = a->nonew,shift = a->indexshift; 216 Scalar *ap; 217 218 PetscFunctionBegin; 219 for ( i=0; i<m; i++ ) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_BOPT_g) 222 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 223 #endif 224 if (im[i] >= rstart && im[i] < rend) { 225 row = im[i] - rstart; 226 for ( j=0; j<n; j++ ) { 227 if (in[j] >= cstart && in[j] < cend){ 228 col = in[j] - cstart; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } 233 else if (in[j] < 0) continue; 234 #if defined(PETSC_USE_BOPT_g) 235 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 236 #endif 237 else { 238 if (mat->was_assembled) { 239 if (!aij->colmap) { 240 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 241 } 242 #if defined (PETSC_USE_CTABLE) 243 ierr = TableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 244 col--; 245 #else 246 col = aij->colmap[in[j]] - 1; 247 #endif 248 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 249 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 250 col = in[j]; 251 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 252 B = aij->B; 253 b = (Mat_SeqAIJ *) B->data; 254 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 255 ba = b->a; 256 } 257 } else col = in[j]; 258 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNC__ 277 #define __FUNC__ "MatGetValues_MPIAIJ" 278 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 279 { 280 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 281 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 282 int cstart = aij->cstart, cend = aij->cend,row,col; 283 284 PetscFunctionBegin; 285 for ( i=0; i<m; i++ ) { 286 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 287 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 288 if (idxm[i] >= rstart && idxm[i] < rend) { 289 row = idxm[i] - rstart; 290 for ( j=0; j<n; j++ ) { 291 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 292 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 293 if (idxn[j] >= cstart && idxn[j] < cend){ 294 col = idxn[j] - cstart; 295 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 296 } else { 297 if (!aij->colmap) { 298 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 299 } 300 #if defined (PETSC_USE_CTABLE) 301 ierr = TableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 302 col --; 303 #else 304 col = aij->colmap[idxn[j]] - 1; 305 #endif 306 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 307 else { 308 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 309 } 310 } 311 } 312 } else { 313 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 314 } 315 } 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNC__ 320 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 321 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 322 { 323 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 324 int ierr,nstash,reallocs; 325 InsertMode addv; 326 327 PetscFunctionBegin; 328 if (aij->donotstash) { 329 PetscFunctionReturn(0); 330 } 331 332 /* make sure all processors are either in INSERTMODE or ADDMODE */ 333 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 334 if (addv == (ADD_VALUES|INSERT_VALUES)) { 335 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 336 } 337 mat->insertmode = addv; /* in case this processor had no cache */ 338 339 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 340 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 341 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 342 PetscFunctionReturn(0); 343 } 344 345 346 #undef __FUNC__ 347 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 348 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 349 { 350 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 351 int i,j,rstart,ncols,n,ierr,flg; 352 int *row,*col,other_disassembled; 353 Scalar *val; 354 InsertMode addv = mat->insertmode; 355 356 PetscFunctionBegin; 357 if (!aij->donotstash) { 358 while (1) { 359 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 360 if (!flg) break; 361 362 for ( i=0; i<n; ) { 363 /* Now identify the consecutive vals belonging to the same row */ 364 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 365 if (j < n) ncols = j-i; 366 else ncols = n-i; 367 /* Now assemble all these values with a single function call */ 368 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 369 i = j; 370 } 371 } 372 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 373 } 374 375 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 376 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 377 378 /* determine if any processor has disassembled, if so we must 379 also disassemble ourselfs, in order that we may reassemble. */ 380 /* 381 if nonzero structure of submatrix B cannot change then we know that 382 no processor disassembled thus we can skip this stuff 383 */ 384 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 385 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 386 if (mat->was_assembled && !other_disassembled) { 387 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 388 } 389 } 390 391 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 392 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 393 } 394 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 395 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 396 397 if (aij->rowvalues) { 398 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 399 aij->rowvalues = 0; 400 } 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNC__ 405 #define __FUNC__ "MatZeroEntries_MPIAIJ" 406 int MatZeroEntries_MPIAIJ(Mat A) 407 { 408 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 409 int ierr; 410 411 PetscFunctionBegin; 412 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 413 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNC__ 418 #define __FUNC__ "MatZeroRows_MPIAIJ" 419 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 420 { 421 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 422 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 423 int *procs,*nprocs,j,found,idx,nsends,*work,row; 424 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 425 int *rvalues,tag = A->tag,count,base,slen,n,*source; 426 int *lens,imdex,*lrows,*values,rstart=l->rstart; 427 MPI_Comm comm = A->comm; 428 MPI_Request *send_waits,*recv_waits; 429 MPI_Status recv_status,*send_status; 430 IS istmp; 431 432 PetscFunctionBegin; 433 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 434 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 435 436 /* first count number of contributors to each processor */ 437 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 438 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 439 procs = nprocs + size; 440 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 441 for ( i=0; i<N; i++ ) { 442 idx = rows[i]; 443 found = 0; 444 for ( j=0; j<size; j++ ) { 445 if (idx >= owners[j] && idx < owners[j+1]) { 446 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 447 } 448 } 449 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 450 } 451 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 452 453 /* inform other processors of number of messages and max length*/ 454 work = (int *) PetscMalloc( size*sizeof(int) );CHKPTRQ(work); 455 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 456 nrecvs = work[rank]; 457 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 458 nmax = work[rank]; 459 ierr = PetscFree(work);CHKERRQ(ierr); 460 461 /* post receives: */ 462 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 463 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 464 for ( i=0; i<nrecvs; i++ ) { 465 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 466 } 467 468 /* do sends: 469 1) starts[i] gives the starting index in svalues for stuff going to 470 the ith processor 471 */ 472 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 473 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 474 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 475 starts[0] = 0; 476 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 477 for ( i=0; i<N; i++ ) { 478 svalues[starts[owner[i]]++] = rows[i]; 479 } 480 ISRestoreIndices(is,&rows); 481 482 starts[0] = 0; 483 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 484 count = 0; 485 for ( i=0; i<size; i++ ) { 486 if (procs[i]) { 487 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 488 } 489 } 490 ierr = PetscFree(starts);CHKERRQ(ierr); 491 492 base = owners[rank]; 493 494 /* wait on receives */ 495 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 496 source = lens + nrecvs; 497 count = nrecvs; slen = 0; 498 while (count) { 499 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 500 /* unpack receives into our local space */ 501 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 502 source[imdex] = recv_status.MPI_SOURCE; 503 lens[imdex] = n; 504 slen += n; 505 count--; 506 } 507 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 508 509 /* move the data into the send scatter */ 510 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 511 count = 0; 512 for ( i=0; i<nrecvs; i++ ) { 513 values = rvalues + i*nmax; 514 for ( j=0; j<lens[i]; j++ ) { 515 lrows[count++] = values[j] - base; 516 } 517 } 518 ierr = PetscFree(rvalues);CHKERRQ(ierr); 519 ierr = PetscFree(lens);CHKERRQ(ierr); 520 ierr = PetscFree(owner);CHKERRQ(ierr); 521 ierr = PetscFree(nprocs);CHKERRQ(ierr); 522 523 /* actually zap the local rows */ 524 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 525 PLogObjectParent(A,istmp); 526 527 /* 528 Zero the required rows. If the "diagonal block" of the matrix 529 is square and the user wishes to set the diagonal we use seperate 530 code so that MatSetValues() is not called for each diagonal allocating 531 new memory, thus calling lots of mallocs and slowing things down. 532 533 Contributed by: Mathew Knepley 534 */ 535 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 536 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 537 if (diag && (l->A->M == l->A->N)) { 538 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 539 } else if (diag) { 540 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 541 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 542 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 544 } 545 for ( i = 0; i < slen; i++ ) { 546 row = lrows[i] + rstart; 547 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 548 } 549 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 550 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 551 } else { 552 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 553 } 554 ierr = ISDestroy(istmp);CHKERRQ(ierr); 555 ierr = PetscFree(lrows);CHKERRQ(ierr); 556 557 /* wait on sends */ 558 if (nsends) { 559 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 560 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 561 ierr = PetscFree(send_status);CHKERRQ(ierr); 562 } 563 ierr = PetscFree(send_waits);CHKERRQ(ierr); 564 ierr = PetscFree(svalues);CHKERRQ(ierr); 565 566 PetscFunctionReturn(0); 567 } 568 569 #undef __FUNC__ 570 #define __FUNC__ "MatMult_MPIAIJ" 571 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 572 { 573 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 574 int ierr,nt; 575 576 PetscFunctionBegin; 577 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 578 if (nt != a->n) { 579 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 580 } 581 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 582 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 583 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 584 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 585 PetscFunctionReturn(0); 586 } 587 588 #undef __FUNC__ 589 #define __FUNC__ "MatMultAdd_MPIAIJ" 590 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 591 { 592 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 593 int ierr; 594 595 PetscFunctionBegin; 596 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 597 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 598 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 599 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 600 PetscFunctionReturn(0); 601 } 602 603 #undef __FUNC__ 604 #define __FUNC__ "MatMultTrans_MPIAIJ" 605 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 606 { 607 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 608 int ierr; 609 610 PetscFunctionBegin; 611 /* do nondiagonal part */ 612 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 613 /* send it on its way */ 614 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 615 /* do local part */ 616 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 617 /* receive remote parts: note this assumes the values are not actually */ 618 /* inserted in yy until the next line, which is true for my implementation*/ 619 /* but is not perhaps always true. */ 620 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNC__ 625 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 626 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 627 { 628 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 629 int ierr; 630 631 PetscFunctionBegin; 632 /* do nondiagonal part */ 633 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 634 /* send it on its way */ 635 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 636 /* do local part */ 637 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 638 /* receive remote parts: note this assumes the values are not actually */ 639 /* inserted in yy until the next line, which is true for my implementation*/ 640 /* but is not perhaps always true. */ 641 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 642 PetscFunctionReturn(0); 643 } 644 645 /* 646 This only works correctly for square matrices where the subblock A->A is the 647 diagonal block 648 */ 649 #undef __FUNC__ 650 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 651 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 652 { 653 int ierr; 654 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 655 656 PetscFunctionBegin; 657 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 658 if (a->rstart != a->cstart || a->rend != a->cend) { 659 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 660 } 661 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 662 PetscFunctionReturn(0); 663 } 664 665 #undef __FUNC__ 666 #define __FUNC__ "MatScale_MPIAIJ" 667 int MatScale_MPIAIJ(Scalar *aa,Mat A) 668 { 669 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 670 int ierr; 671 672 PetscFunctionBegin; 673 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 674 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 675 PetscFunctionReturn(0); 676 } 677 678 #undef __FUNC__ 679 #define __FUNC__ "MatDestroy_MPIAIJ" 680 int MatDestroy_MPIAIJ(Mat mat) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 683 int ierr; 684 685 PetscFunctionBegin; 686 687 if (mat->mapping) { 688 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 689 } 690 if (mat->bmapping) { 691 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 692 } 693 if (mat->rmap) { 694 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 695 } 696 if (mat->cmap) { 697 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 698 } 699 #if defined(PETSC_USE_LOG) 700 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 701 #endif 702 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 703 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 704 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 705 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 706 #if defined (PETSC_USE_CTABLE) 707 if (aij->colmap) TableDelete(aij->colmap); 708 #else 709 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 710 #endif 711 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 712 if (aij->lvec) VecDestroy(aij->lvec); 713 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 714 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 715 ierr = PetscFree(aij);CHKERRQ(ierr); 716 PLogObjectDestroy(mat); 717 PetscHeaderDestroy(mat); 718 PetscFunctionReturn(0); 719 } 720 721 #undef __FUNC__ 722 #define __FUNC__ "MatView_MPIAIJ_Binary" 723 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 724 { 725 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 726 int ierr; 727 728 PetscFunctionBegin; 729 if (aij->size == 1) { 730 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 731 } 732 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 733 PetscFunctionReturn(0); 734 } 735 736 #undef __FUNC__ 737 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 738 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 739 { 740 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 741 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 742 int ierr, format,shift = C->indexshift,rank = aij->rank ; 743 int size = aij->size; 744 FILE *fd; 745 ViewerType vtype; 746 747 PetscFunctionBegin; 748 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 749 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 750 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 751 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 752 MatInfo info; 753 int flg; 754 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 755 ierr = ViewerASCIIGetPointer(viewer,&fd);CHKERRQ(ierr); 756 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 757 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 758 PetscSequentialPhaseBegin(mat->comm,1); 759 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 760 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 761 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 762 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 763 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 764 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 765 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 766 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 767 fflush(fd); 768 PetscSequentialPhaseEnd(mat->comm,1); 769 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 770 PetscFunctionReturn(0); 771 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 772 PetscFunctionReturn(0); 773 } 774 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 775 Draw draw; 776 PetscTruth isnull; 777 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 778 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 779 } 780 781 if (size == 1) { 782 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 783 } else { 784 /* assemble the entire matrix onto first processor. */ 785 Mat A; 786 Mat_SeqAIJ *Aloc; 787 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 788 Scalar *a; 789 790 if (!rank) { 791 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 792 } else { 793 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 794 } 795 PLogObjectParent(mat,A); 796 797 /* copy over the A part */ 798 Aloc = (Mat_SeqAIJ*) aij->A->data; 799 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 800 row = aij->rstart; 801 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 802 for ( i=0; i<m; i++ ) { 803 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 804 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 805 } 806 aj = Aloc->j; 807 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 808 809 /* copy over the B part */ 810 Aloc = (Mat_SeqAIJ*) aij->B->data; 811 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 812 row = aij->rstart; 813 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols); 814 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 815 for ( i=0; i<m; i++ ) { 816 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 817 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 818 } 819 ierr = PetscFree(ct);CHKERRQ(ierr); 820 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 821 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 /* 823 Everyone has to call to draw the matrix since the graphics waits are 824 synchronized across all processors that share the Draw object 825 */ 826 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 827 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer);CHKERRQ(ierr); 828 } 829 ierr = MatDestroy(A);CHKERRQ(ierr); 830 } 831 PetscFunctionReturn(0); 832 } 833 834 #undef __FUNC__ 835 #define __FUNC__ "MatView_MPIAIJ" 836 int MatView_MPIAIJ(Mat mat,Viewer viewer) 837 { 838 int ierr; 839 ViewerType vtype; 840 841 PetscFunctionBegin; 842 ierr = ViewerGetType(viewer,&vtype);CHKERRQ(ierr); 843 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 844 PetscTypeCompare(vtype,SOCKET_VIEWER) || PetscTypeCompare(vtype,BINARY_VIEWER)) { 845 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 846 /* 847 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 848 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 849 */ 850 } else { 851 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 852 } 853 PetscFunctionReturn(0); 854 } 855 856 /* 857 This has to provide several versions. 858 859 2) a) use only local smoothing updating outer values only once. 860 b) local smoothing updating outer values each inner iteration 861 3) color updating out values betwen colors. 862 */ 863 #undef __FUNC__ 864 #define __FUNC__ "MatRelax_MPIAIJ" 865 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 866 double fshift,int its,Vec xx) 867 { 868 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 869 Mat AA = mat->A, BB = mat->B; 870 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 871 Scalar *b,*x,*xs,*ls,d,*v,sum; 872 int ierr,*idx, *diag; 873 int n = mat->n, m = mat->m, i,shift = A->indexshift; 874 875 PetscFunctionBegin; 876 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 877 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 878 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 879 xs = x + shift; /* shift by one for index start of 1 */ 880 ls = ls + shift; 881 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);} 882 diag = A->diag; 883 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 884 if (flag & SOR_ZERO_INITIAL_GUESS) { 885 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 886 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 887 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 888 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 892 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 893 while (its--) { 894 /* go down through the rows */ 895 for ( i=0; i<m; i++ ) { 896 n = A->i[i+1] - A->i[i]; 897 PLogFlops(4*n+3); 898 idx = A->j + A->i[i] + shift; 899 v = A->a + A->i[i] + shift; 900 sum = b[i]; 901 SPARSEDENSEMDOT(sum,xs,v,idx,n); 902 d = fshift + A->a[diag[i]+shift]; 903 n = B->i[i+1] - B->i[i]; 904 idx = B->j + B->i[i] + shift; 905 v = B->a + B->i[i] + shift; 906 SPARSEDENSEMDOT(sum,ls,v,idx,n); 907 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 908 } 909 /* come up through the rows */ 910 for ( i=m-1; i>-1; i-- ) { 911 n = A->i[i+1] - A->i[i]; 912 PLogFlops(4*n+3) 913 idx = A->j + A->i[i] + shift; 914 v = A->a + A->i[i] + shift; 915 sum = b[i]; 916 SPARSEDENSEMDOT(sum,xs,v,idx,n); 917 d = fshift + A->a[diag[i]+shift]; 918 n = B->i[i+1] - B->i[i]; 919 idx = B->j + B->i[i] + shift; 920 v = B->a + B->i[i] + shift; 921 SPARSEDENSEMDOT(sum,ls,v,idx,n); 922 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 923 } 924 } 925 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 926 if (flag & SOR_ZERO_INITIAL_GUESS) { 927 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 928 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 929 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 930 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 931 PetscFunctionReturn(0); 932 } 933 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 934 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 935 while (its--) { 936 for ( i=0; i<m; i++ ) { 937 n = A->i[i+1] - A->i[i]; 938 PLogFlops(4*n+3); 939 idx = A->j + A->i[i] + shift; 940 v = A->a + A->i[i] + shift; 941 sum = b[i]; 942 SPARSEDENSEMDOT(sum,xs,v,idx,n); 943 d = fshift + A->a[diag[i]+shift]; 944 n = B->i[i+1] - B->i[i]; 945 idx = B->j + B->i[i] + shift; 946 v = B->a + B->i[i] + shift; 947 SPARSEDENSEMDOT(sum,ls,v,idx,n); 948 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 949 } 950 } 951 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 952 if (flag & SOR_ZERO_INITIAL_GUESS) { 953 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 954 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 955 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 956 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 957 PetscFunctionReturn(0); 958 } 959 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 960 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 961 while (its--) { 962 for ( i=m-1; i>-1; i-- ) { 963 n = A->i[i+1] - A->i[i]; 964 PLogFlops(4*n+3); 965 idx = A->j + A->i[i] + shift; 966 v = A->a + A->i[i] + shift; 967 sum = b[i]; 968 SPARSEDENSEMDOT(sum,xs,v,idx,n); 969 d = fshift + A->a[diag[i]+shift]; 970 n = B->i[i+1] - B->i[i]; 971 idx = B->j + B->i[i] + shift; 972 v = B->a + B->i[i] + shift; 973 SPARSEDENSEMDOT(sum,ls,v,idx,n); 974 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 975 } 976 } 977 } else { 978 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 979 } 980 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 981 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 982 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 983 PetscFunctionReturn(0); 984 } 985 986 #undef __FUNC__ 987 #define __FUNC__ "MatGetInfo_MPIAIJ" 988 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 989 { 990 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 991 Mat A = mat->A, B = mat->B; 992 int ierr; 993 double isend[5], irecv[5]; 994 995 PetscFunctionBegin; 996 info->block_size = 1.0; 997 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 998 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 999 isend[3] = info->memory; isend[4] = info->mallocs; 1000 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1001 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1002 isend[3] += info->memory; isend[4] += info->mallocs; 1003 if (flag == MAT_LOCAL) { 1004 info->nz_used = isend[0]; 1005 info->nz_allocated = isend[1]; 1006 info->nz_unneeded = isend[2]; 1007 info->memory = isend[3]; 1008 info->mallocs = isend[4]; 1009 } else if (flag == MAT_GLOBAL_MAX) { 1010 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1011 info->nz_used = irecv[0]; 1012 info->nz_allocated = irecv[1]; 1013 info->nz_unneeded = irecv[2]; 1014 info->memory = irecv[3]; 1015 info->mallocs = irecv[4]; 1016 } else if (flag == MAT_GLOBAL_SUM) { 1017 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1018 info->nz_used = irecv[0]; 1019 info->nz_allocated = irecv[1]; 1020 info->nz_unneeded = irecv[2]; 1021 info->memory = irecv[3]; 1022 info->mallocs = irecv[4]; 1023 } 1024 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1025 info->fill_ratio_needed = 0; 1026 info->factor_mallocs = 0; 1027 info->rows_global = (double)mat->M; 1028 info->columns_global = (double)mat->N; 1029 info->rows_local = (double)mat->m; 1030 info->columns_local = (double)mat->N; 1031 1032 PetscFunctionReturn(0); 1033 } 1034 1035 #undef __FUNC__ 1036 #define __FUNC__ "MatSetOption_MPIAIJ" 1037 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1038 { 1039 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1040 int ierr; 1041 1042 PetscFunctionBegin; 1043 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1044 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1045 op == MAT_COLUMNS_UNSORTED || 1046 op == MAT_COLUMNS_SORTED || 1047 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1048 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1049 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1050 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1051 } else if (op == MAT_ROW_ORIENTED) { 1052 a->roworiented = 1; 1053 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1054 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1055 } else if (op == MAT_ROWS_SORTED || 1056 op == MAT_ROWS_UNSORTED || 1057 op == MAT_SYMMETRIC || 1058 op == MAT_STRUCTURALLY_SYMMETRIC || 1059 op == MAT_YES_NEW_DIAGONALS) 1060 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1061 else if (op == MAT_COLUMN_ORIENTED) { 1062 a->roworiented = 0; 1063 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1064 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1065 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1066 a->donotstash = 1; 1067 } else if (op == MAT_NO_NEW_DIAGONALS){ 1068 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1069 } else { 1070 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1071 } 1072 PetscFunctionReturn(0); 1073 } 1074 1075 #undef __FUNC__ 1076 #define __FUNC__ "MatGetSize_MPIAIJ" 1077 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1078 { 1079 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1080 1081 PetscFunctionBegin; 1082 if (m) *m = mat->M; 1083 if (n) *n = mat->N; 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNC__ 1088 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1089 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1090 { 1091 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1092 1093 PetscFunctionBegin; 1094 if (m) *m = mat->m; 1095 if (n) *n = mat->n; 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNC__ 1100 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1101 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1102 { 1103 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1104 1105 PetscFunctionBegin; 1106 *m = mat->rstart; *n = mat->rend; 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNC__ 1111 #define __FUNC__ "MatGetRow_MPIAIJ" 1112 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1113 { 1114 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1115 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1116 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1117 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1118 int *cmap, *idx_p; 1119 1120 PetscFunctionBegin; 1121 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1122 mat->getrowactive = PETSC_TRUE; 1123 1124 if (!mat->rowvalues && (idx || v)) { 1125 /* 1126 allocate enough space to hold information from the longest row. 1127 */ 1128 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1129 int max = 1,m = mat->m,tmp; 1130 for ( i=0; i<m; i++ ) { 1131 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1132 if (max < tmp) { max = tmp; } 1133 } 1134 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1135 mat->rowindices = (int *) (mat->rowvalues + max); 1136 } 1137 1138 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1139 lrow = row - rstart; 1140 1141 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1142 if (!v) {pvA = 0; pvB = 0;} 1143 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1144 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1145 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1146 nztot = nzA + nzB; 1147 1148 cmap = mat->garray; 1149 if (v || idx) { 1150 if (nztot) { 1151 /* Sort by increasing column numbers, assuming A and B already sorted */ 1152 int imark = -1; 1153 if (v) { 1154 *v = v_p = mat->rowvalues; 1155 for ( i=0; i<nzB; i++ ) { 1156 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1157 else break; 1158 } 1159 imark = i; 1160 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1161 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1162 } 1163 if (idx) { 1164 *idx = idx_p = mat->rowindices; 1165 if (imark > -1) { 1166 for ( i=0; i<imark; i++ ) { 1167 idx_p[i] = cmap[cworkB[i]]; 1168 } 1169 } else { 1170 for ( i=0; i<nzB; i++ ) { 1171 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1172 else break; 1173 } 1174 imark = i; 1175 } 1176 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1177 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1178 } 1179 } else { 1180 if (idx) *idx = 0; 1181 if (v) *v = 0; 1182 } 1183 } 1184 *nz = nztot; 1185 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1186 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNC__ 1191 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1192 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1193 { 1194 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1195 1196 PetscFunctionBegin; 1197 if (aij->getrowactive == PETSC_FALSE) { 1198 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1199 } 1200 aij->getrowactive = PETSC_FALSE; 1201 PetscFunctionReturn(0); 1202 } 1203 1204 #undef __FUNC__ 1205 #define __FUNC__ "MatNorm_MPIAIJ" 1206 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1207 { 1208 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1209 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1210 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1211 double sum = 0.0; 1212 Scalar *v; 1213 1214 PetscFunctionBegin; 1215 if (aij->size == 1) { 1216 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1217 } else { 1218 if (type == NORM_FROBENIUS) { 1219 v = amat->a; 1220 for (i=0; i<amat->nz; i++ ) { 1221 #if defined(PETSC_USE_COMPLEX) 1222 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1223 #else 1224 sum += (*v)*(*v); v++; 1225 #endif 1226 } 1227 v = bmat->a; 1228 for (i=0; i<bmat->nz; i++ ) { 1229 #if defined(PETSC_USE_COMPLEX) 1230 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1231 #else 1232 sum += (*v)*(*v); v++; 1233 #endif 1234 } 1235 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1236 *norm = sqrt(*norm); 1237 } else if (type == NORM_1) { /* max column norm */ 1238 double *tmp, *tmp2; 1239 int *jj, *garray = aij->garray; 1240 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp); 1241 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2); 1242 ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr); 1243 *norm = 0.0; 1244 v = amat->a; jj = amat->j; 1245 for ( j=0; j<amat->nz; j++ ) { 1246 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1247 } 1248 v = bmat->a; jj = bmat->j; 1249 for ( j=0; j<bmat->nz; j++ ) { 1250 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1251 } 1252 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1253 for ( j=0; j<aij->N; j++ ) { 1254 if (tmp2[j] > *norm) *norm = tmp2[j]; 1255 } 1256 ierr = PetscFree(tmp);CHKERRQ(ierr); 1257 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1258 } else if (type == NORM_INFINITY) { /* max row norm */ 1259 double ntemp = 0.0; 1260 for ( j=0; j<amat->m; j++ ) { 1261 v = amat->a + amat->i[j] + shift; 1262 sum = 0.0; 1263 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1264 sum += PetscAbsScalar(*v); v++; 1265 } 1266 v = bmat->a + bmat->i[j] + shift; 1267 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1268 sum += PetscAbsScalar(*v); v++; 1269 } 1270 if (sum > ntemp) ntemp = sum; 1271 } 1272 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1273 } else { 1274 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1275 } 1276 } 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #undef __FUNC__ 1281 #define __FUNC__ "MatTranspose_MPIAIJ" 1282 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1283 { 1284 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1285 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1286 int ierr,shift = Aloc->indexshift; 1287 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1288 Mat B; 1289 Scalar *array; 1290 1291 PetscFunctionBegin; 1292 if (matout == PETSC_NULL && M != N) { 1293 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1294 } 1295 1296 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1297 1298 /* copy over the A part */ 1299 Aloc = (Mat_SeqAIJ*) a->A->data; 1300 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1301 row = a->rstart; 1302 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1303 for ( i=0; i<m; i++ ) { 1304 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1305 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1306 } 1307 aj = Aloc->j; 1308 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1309 1310 /* copy over the B part */ 1311 Aloc = (Mat_SeqAIJ*) a->B->data; 1312 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1313 row = a->rstart; 1314 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols); 1315 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1316 for ( i=0; i<m; i++ ) { 1317 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1318 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1319 } 1320 ierr = PetscFree(ct);CHKERRQ(ierr); 1321 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1322 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1323 if (matout != PETSC_NULL) { 1324 *matout = B; 1325 } else { 1326 PetscOps *Abops; 1327 struct _MatOps *Aops; 1328 1329 /* This isn't really an in-place transpose .... but free data structures from a */ 1330 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 1331 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1332 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1333 #if defined (PETSC_USE_CTABLE) 1334 if (a->colmap) TableDelete(a->colmap); 1335 #else 1336 if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);} 1337 #endif 1338 if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);} 1339 if (a->lvec) VecDestroy(a->lvec); 1340 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1341 ierr = PetscFree(a);CHKERRQ(ierr); 1342 1343 /* 1344 This is horrible, horrible code. We need to keep the 1345 A pointers for the bops and ops but copy everything 1346 else from C. 1347 */ 1348 Abops = A->bops; 1349 Aops = A->ops; 1350 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1351 A->bops = Abops; 1352 A->ops = Aops; 1353 PetscHeaderDestroy(B); 1354 } 1355 PetscFunctionReturn(0); 1356 } 1357 1358 #undef __FUNC__ 1359 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1360 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1361 { 1362 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1363 Mat a = aij->A, b = aij->B; 1364 int ierr,s1,s2,s3; 1365 1366 PetscFunctionBegin; 1367 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1368 if (rr) { 1369 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1370 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1371 /* Overlap communication with computation. */ 1372 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1373 } 1374 if (ll) { 1375 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1376 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1377 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1378 } 1379 /* scale the diagonal block */ 1380 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1381 1382 if (rr) { 1383 /* Do a scatter end and then right scale the off-diagonal block */ 1384 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1385 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1386 } 1387 1388 PetscFunctionReturn(0); 1389 } 1390 1391 1392 #undef __FUNC__ 1393 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1394 int MatPrintHelp_MPIAIJ(Mat A) 1395 { 1396 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1397 int ierr; 1398 1399 PetscFunctionBegin; 1400 if (!a->rank) { 1401 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1402 } 1403 PetscFunctionReturn(0); 1404 } 1405 1406 #undef __FUNC__ 1407 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1408 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1409 { 1410 PetscFunctionBegin; 1411 *bs = 1; 1412 PetscFunctionReturn(0); 1413 } 1414 #undef __FUNC__ 1415 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1416 int MatSetUnfactored_MPIAIJ(Mat A) 1417 { 1418 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1419 int ierr; 1420 1421 PetscFunctionBegin; 1422 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1423 PetscFunctionReturn(0); 1424 } 1425 1426 #undef __FUNC__ 1427 #define __FUNC__ "MatEqual_MPIAIJ" 1428 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1429 { 1430 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1431 Mat a, b, c, d; 1432 PetscTruth flg; 1433 int ierr; 1434 1435 PetscFunctionBegin; 1436 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1437 a = matA->A; b = matA->B; 1438 c = matB->A; d = matB->B; 1439 1440 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1441 if (flg == PETSC_TRUE) { 1442 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1443 } 1444 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 #undef __FUNC__ 1449 #define __FUNC__ "MatCopy_MPIAIJ" 1450 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1451 { 1452 int ierr; 1453 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1454 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1455 1456 PetscFunctionBegin; 1457 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1458 /* because of the column compression in the off-processor part of the matrix a->B, 1459 the number of columns in a->B and b->B may be different, hence we cannot call 1460 the MatCopy() directly on the two parts. If need be, we can provide a more 1461 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1462 then copying the submatrices */ 1463 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1464 } else { 1465 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1466 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1467 } 1468 PetscFunctionReturn(0); 1469 } 1470 1471 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1472 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1473 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1474 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 1475 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 1476 1477 /* -------------------------------------------------------------------*/ 1478 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1479 MatGetRow_MPIAIJ, 1480 MatRestoreRow_MPIAIJ, 1481 MatMult_MPIAIJ, 1482 MatMultAdd_MPIAIJ, 1483 MatMultTrans_MPIAIJ, 1484 MatMultTransAdd_MPIAIJ, 1485 0, 1486 0, 1487 0, 1488 0, 1489 0, 1490 0, 1491 MatRelax_MPIAIJ, 1492 MatTranspose_MPIAIJ, 1493 MatGetInfo_MPIAIJ, 1494 MatEqual_MPIAIJ, 1495 MatGetDiagonal_MPIAIJ, 1496 MatDiagonalScale_MPIAIJ, 1497 MatNorm_MPIAIJ, 1498 MatAssemblyBegin_MPIAIJ, 1499 MatAssemblyEnd_MPIAIJ, 1500 0, 1501 MatSetOption_MPIAIJ, 1502 MatZeroEntries_MPIAIJ, 1503 MatZeroRows_MPIAIJ, 1504 0, 1505 0, 1506 0, 1507 0, 1508 MatGetSize_MPIAIJ, 1509 MatGetLocalSize_MPIAIJ, 1510 MatGetOwnershipRange_MPIAIJ, 1511 0, 1512 0, 1513 0, 1514 0, 1515 MatDuplicate_MPIAIJ, 1516 0, 1517 0, 1518 0, 1519 0, 1520 0, 1521 MatGetSubMatrices_MPIAIJ, 1522 MatIncreaseOverlap_MPIAIJ, 1523 MatGetValues_MPIAIJ, 1524 MatCopy_MPIAIJ, 1525 MatPrintHelp_MPIAIJ, 1526 MatScale_MPIAIJ, 1527 0, 1528 0, 1529 0, 1530 MatGetBlockSize_MPIAIJ, 1531 0, 1532 0, 1533 0, 1534 0, 1535 MatFDColoringCreate_MPIAIJ, 1536 0, 1537 MatSetUnfactored_MPIAIJ, 1538 0, 1539 0, 1540 MatGetSubMatrix_MPIAIJ, 1541 0, 1542 0, 1543 MatGetMaps_Petsc}; 1544 1545 /* ----------------------------------------------------------------------------------------*/ 1546 1547 EXTERN_C_BEGIN 1548 #undef __FUNC__ 1549 #define __FUNC__ "MatStoreValues_MPIAIJ" 1550 int MatStoreValues_MPIAIJ(Mat mat) 1551 { 1552 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1553 int ierr; 1554 1555 PetscFunctionBegin; 1556 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1557 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1558 PetscFunctionReturn(0); 1559 } 1560 EXTERN_C_END 1561 1562 EXTERN_C_BEGIN 1563 #undef __FUNC__ 1564 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1565 int MatRetrieveValues_MPIAIJ(Mat mat) 1566 { 1567 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1568 int ierr; 1569 1570 PetscFunctionBegin; 1571 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1572 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1573 PetscFunctionReturn(0); 1574 } 1575 EXTERN_C_END 1576 1577 #include "pc.h" 1578 EXTERN_C_BEGIN 1579 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1580 EXTERN_C_END 1581 1582 #undef __FUNC__ 1583 #define __FUNC__ "MatCreateMPIAIJ" 1584 /*@C 1585 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1586 (the default parallel PETSc format). For good matrix assembly performance 1587 the user should preallocate the matrix storage by setting the parameters 1588 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1589 performance can be increased by more than a factor of 50. 1590 1591 Collective on MPI_Comm 1592 1593 Input Parameters: 1594 + comm - MPI communicator 1595 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1596 This value should be the same as the local size used in creating the 1597 y vector for the matrix-vector product y = Ax. 1598 . n - This value should be the same as the local size used in creating the 1599 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1600 calculated if N is given) For square matrices n is almost always m. 1601 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1602 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1603 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1604 (same value is used for all local rows) 1605 . d_nnz - array containing the number of nonzeros in the various rows of the 1606 DIAGONAL portion of the local submatrix (possibly different for each row) 1607 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1608 The size of this array is equal to the number of local rows, i.e 'm'. 1609 You must leave room for the diagonal entry even if it is zero. 1610 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1611 submatrix (same value is used for all local rows). 1612 - o_nnz - array containing the number of nonzeros in the various rows of the 1613 OFF-DIAGONAL portion of the local submatrix (possibly different for 1614 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1615 structure. The size of this array is equal to the number 1616 of local rows, i.e 'm'. 1617 1618 Output Parameter: 1619 . A - the matrix 1620 1621 Notes: 1622 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1623 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1624 storage requirements for this matrix. 1625 1626 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1627 processor than it must be used on all processors that share the object for 1628 that argument. 1629 1630 The AIJ format (also called the Yale sparse matrix format or 1631 compressed row storage), is fully compatible with standard Fortran 77 1632 storage. That is, the stored row and column indices can begin at 1633 either one (as in Fortran) or zero. See the users manual for details. 1634 1635 The user MUST specify either the local or global matrix dimensions 1636 (possibly both). 1637 1638 The parallel matrix is partitioned such that the first m0 rows belong to 1639 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1640 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1641 1642 The DIAGONAL portion of the local submatrix of a processor can be defined 1643 as the submatrix which is obtained by extraction the part corresponding 1644 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1645 first row that belongs to the processor, and r2 is the last row belonging 1646 to the this processor. This is a square mxm matrix. The remaining portion 1647 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1648 1649 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1650 1651 By default, this format uses inodes (identical nodes) when possible. 1652 We search for consecutive rows with the same nonzero structure, thereby 1653 reusing matrix information to achieve increased efficiency. 1654 1655 Options Database Keys: 1656 + -mat_aij_no_inode - Do not use inodes 1657 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1658 - -mat_aij_oneindex - Internally use indexing starting at 1 1659 rather than 0. Note that when calling MatSetValues(), 1660 the user still MUST index entries starting at 0! 1661 . -mat_mpi - use the parallel matrix data structures even on one processor 1662 (defaults to using SeqBAIJ format on one processor) 1663 . -mat_mpi - use the parallel matrix data structures even on one processor 1664 (defaults to using SeqAIJ format on one processor) 1665 1666 1667 Example usage: 1668 1669 Consider the following 8x8 matrix with 34 non-zero values, that is 1670 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1671 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1672 as follows: 1673 1674 .vb 1675 1 2 0 | 0 3 0 | 0 4 1676 Proc0 0 5 6 | 7 0 0 | 8 0 1677 9 0 10 | 11 0 0 | 12 0 1678 ------------------------------------- 1679 13 0 14 | 15 16 17 | 0 0 1680 Proc1 0 18 0 | 19 20 21 | 0 0 1681 0 0 0 | 22 23 0 | 24 0 1682 ------------------------------------- 1683 Proc2 25 26 27 | 0 0 28 | 29 0 1684 30 0 0 | 31 32 33 | 0 34 1685 .ve 1686 1687 This can be represented as a collection of submatrices as: 1688 1689 .vb 1690 A B C 1691 D E F 1692 G H I 1693 .ve 1694 1695 Where the submatrices A,B,C are owned by proc0, D,E,F are 1696 owned by proc1, G,H,I are owned by proc2. 1697 1698 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1699 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1700 The 'M','N' parameters are 8,8, and have the same values on all procs. 1701 1702 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1703 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1704 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1705 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1706 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1707 matrix, ans [DF] as another SeqAIJ matrix. 1708 1709 When d_nz, o_nz parameters are specified, d_nz storage elements are 1710 allocated for every row of the local diagonal submatrix, and o_nz 1711 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1712 One way to choose d_nz and o_nz is to use the max nonzerors per local 1713 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1714 In this case, the values of d_nz,o_nz are: 1715 .vb 1716 proc0 : dnz = 2, o_nz = 2 1717 proc1 : dnz = 3, o_nz = 2 1718 proc2 : dnz = 1, o_nz = 4 1719 .ve 1720 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1721 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1722 for proc3. i.e we are using 12+15+10=37 storage locations to store 1723 34 values. 1724 1725 When d_nnz, o_nnz parameters are specified, the storage is specified 1726 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1727 In the above case the values for d_nnz,o_nnz are: 1728 .vb 1729 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1730 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1731 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1732 .ve 1733 Here the space allocated is sum of all the above values i.e 34, and 1734 hence pre-allocation is perfect. 1735 1736 Level: intermediate 1737 1738 .keywords: matrix, aij, compressed row, sparse, parallel 1739 1740 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1741 @*/ 1742 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1743 { 1744 Mat B; 1745 Mat_MPIAIJ *b; 1746 int ierr, i,size,flag1 = 0, flag2 = 0; 1747 1748 PetscFunctionBegin; 1749 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1750 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr); 1751 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1752 if (!flag1 && !flag2 && size == 1) { 1753 if (M == PETSC_DECIDE) M = m; 1754 if (N == PETSC_DECIDE) N = n; 1755 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 1756 PetscFunctionReturn(0); 1757 } 1758 1759 *A = 0; 1760 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1761 PLogObjectCreate(B); 1762 B->data = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b); 1763 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1764 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1765 B->ops->destroy = MatDestroy_MPIAIJ; 1766 B->ops->view = MatView_MPIAIJ; 1767 B->factor = 0; 1768 B->assembled = PETSC_FALSE; 1769 B->mapping = 0; 1770 1771 B->insertmode = NOT_SET_VALUES; 1772 b->size = size; 1773 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1774 1775 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1776 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1777 } 1778 1779 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1780 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1781 b->m = m; B->m = m; 1782 b->n = n; B->n = n; 1783 b->N = N; B->N = N; 1784 b->M = M; B->M = M; 1785 1786 /* the information in the maps duplicates the information computed below, eventually 1787 we should remove the duplicate information that is not contained in the maps */ 1788 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1789 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1790 1791 /* build local table of row and column ownerships */ 1792 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1793 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1794 b->cowners = b->rowners + b->size + 2; 1795 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1796 b->rowners[0] = 0; 1797 for ( i=2; i<=b->size; i++ ) { 1798 b->rowners[i] += b->rowners[i-1]; 1799 } 1800 b->rstart = b->rowners[b->rank]; 1801 b->rend = b->rowners[b->rank+1]; 1802 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1803 b->cowners[0] = 0; 1804 for ( i=2; i<=b->size; i++ ) { 1805 b->cowners[i] += b->cowners[i-1]; 1806 } 1807 b->cstart = b->cowners[b->rank]; 1808 b->cend = b->cowners[b->rank+1]; 1809 1810 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1811 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1812 PLogObjectParent(B,b->A); 1813 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1814 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1815 PLogObjectParent(B,b->B); 1816 1817 /* build cache for off array entries formed */ 1818 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 1819 b->donotstash = 0; 1820 b->colmap = 0; 1821 b->garray = 0; 1822 b->roworiented = 1; 1823 1824 /* stuff used for matrix vector multiply */ 1825 b->lvec = 0; 1826 b->Mvctx = 0; 1827 1828 /* stuff for MatGetRow() */ 1829 b->rowindices = 0; 1830 b->rowvalues = 0; 1831 b->getrowactive = PETSC_FALSE; 1832 1833 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1834 "MatStoreValues_MPIAIJ", 1835 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1836 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1837 "MatRetrieveValues_MPIAIJ", 1838 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1839 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C", 1840 "MatGetDiagonalBlock_MPIAIJ", 1841 (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1842 *A = B; 1843 PetscFunctionReturn(0); 1844 } 1845 1846 #undef __FUNC__ 1847 #define __FUNC__ "MatDuplicate_MPIAIJ" 1848 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1849 { 1850 Mat mat; 1851 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1852 int ierr, len=0, flg; 1853 1854 PetscFunctionBegin; 1855 *newmat = 0; 1856 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1857 PLogObjectCreate(mat); 1858 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a); 1859 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1860 mat->ops->destroy = MatDestroy_MPIAIJ; 1861 mat->ops->view = MatView_MPIAIJ; 1862 mat->factor = matin->factor; 1863 mat->assembled = PETSC_TRUE; 1864 mat->insertmode = NOT_SET_VALUES; 1865 1866 a->m = mat->m = oldmat->m; 1867 a->n = mat->n = oldmat->n; 1868 a->M = mat->M = oldmat->M; 1869 a->N = mat->N = oldmat->N; 1870 1871 a->rstart = oldmat->rstart; 1872 a->rend = oldmat->rend; 1873 a->cstart = oldmat->cstart; 1874 a->cend = oldmat->cend; 1875 a->size = oldmat->size; 1876 a->rank = oldmat->rank; 1877 a->donotstash = oldmat->donotstash; 1878 a->roworiented = oldmat->roworiented; 1879 a->rowindices = 0; 1880 a->rowvalues = 0; 1881 a->getrowactive = PETSC_FALSE; 1882 1883 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1884 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1885 a->cowners = a->rowners + a->size + 2; 1886 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1887 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1888 if (oldmat->colmap) { 1889 #if defined (PETSC_USE_CTABLE) 1890 ierr = TableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1891 #else 1892 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1893 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1894 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr); 1895 #endif 1896 } else a->colmap = 0; 1897 if (oldmat->garray) { 1898 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1899 a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray); 1900 PLogObjectMemory(mat,len*sizeof(int)); 1901 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1902 } else a->garray = 0; 1903 1904 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1905 PLogObjectParent(mat,a->lvec); 1906 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1907 PLogObjectParent(mat,a->Mvctx); 1908 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1909 PLogObjectParent(mat,a->A); 1910 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1911 PLogObjectParent(mat,a->B); 1912 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1913 if (flg) { 1914 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1915 } 1916 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1917 *newmat = mat; 1918 PetscFunctionReturn(0); 1919 } 1920 1921 #include "sys.h" 1922 1923 #undef __FUNC__ 1924 #define __FUNC__ "MatLoad_MPIAIJ" 1925 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1926 { 1927 Mat A; 1928 Scalar *vals,*svals; 1929 MPI_Comm comm = ((PetscObject)viewer)->comm; 1930 MPI_Status status; 1931 int i, nz, ierr, j,rstart, rend, fd; 1932 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1933 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1934 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1935 1936 PetscFunctionBegin; 1937 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1938 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1939 if (!rank) { 1940 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1941 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1942 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1943 if (header[3] < 0) { 1944 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1945 } 1946 } 1947 1948 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1949 M = header[1]; N = header[2]; 1950 /* determine ownership of all rows */ 1951 m = M/size + ((M % size) > rank); 1952 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1953 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1954 rowners[0] = 0; 1955 for ( i=2; i<=size; i++ ) { 1956 rowners[i] += rowners[i-1]; 1957 } 1958 rstart = rowners[rank]; 1959 rend = rowners[rank+1]; 1960 1961 /* distribute row lengths to all processors */ 1962 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1963 offlens = ourlens + (rend-rstart); 1964 if (!rank) { 1965 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1966 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1967 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 1968 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1969 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1970 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 1971 } else { 1972 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1973 } 1974 1975 if (!rank) { 1976 /* calculate the number of nonzeros on each processor */ 1977 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 1978 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 1979 for ( i=0; i<size; i++ ) { 1980 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1981 procsnz[i] += rowlengths[j]; 1982 } 1983 } 1984 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1985 1986 /* determine max buffer needed and allocate it */ 1987 maxnz = 0; 1988 for ( i=0; i<size; i++ ) { 1989 maxnz = PetscMax(maxnz,procsnz[i]); 1990 } 1991 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 1992 1993 /* read in my part of the matrix column indices */ 1994 nz = procsnz[0]; 1995 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 1996 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 1997 1998 /* read in every one elses and ship off */ 1999 for ( i=1; i<size; i++ ) { 2000 nz = procsnz[i]; 2001 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2002 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2003 } 2004 ierr = PetscFree(cols);CHKERRQ(ierr); 2005 } else { 2006 /* determine buffer space needed for message */ 2007 nz = 0; 2008 for ( i=0; i<m; i++ ) { 2009 nz += ourlens[i]; 2010 } 2011 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 2012 2013 /* receive message of column indices*/ 2014 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2015 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2016 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2017 } 2018 2019 /* determine column ownership if matrix is not square */ 2020 if (N != M) { 2021 n = N/size + ((N % size) > rank); 2022 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2023 cstart = cend - n; 2024 } else { 2025 cstart = rstart; 2026 cend = rend; 2027 n = cend - cstart; 2028 } 2029 2030 /* loop over local rows, determining number of off diagonal entries */ 2031 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2032 jj = 0; 2033 for ( i=0; i<m; i++ ) { 2034 for ( j=0; j<ourlens[i]; j++ ) { 2035 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2036 jj++; 2037 } 2038 } 2039 2040 /* create our matrix */ 2041 for ( i=0; i<m; i++ ) { 2042 ourlens[i] -= offlens[i]; 2043 } 2044 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2045 A = *newmat; 2046 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2047 for ( i=0; i<m; i++ ) { 2048 ourlens[i] += offlens[i]; 2049 } 2050 2051 if (!rank) { 2052 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 2053 2054 /* read in my part of the matrix numerical values */ 2055 nz = procsnz[0]; 2056 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2057 2058 /* insert into matrix */ 2059 jj = rstart; 2060 smycols = mycols; 2061 svals = vals; 2062 for ( i=0; i<m; i++ ) { 2063 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2064 smycols += ourlens[i]; 2065 svals += ourlens[i]; 2066 jj++; 2067 } 2068 2069 /* read in other processors and ship out */ 2070 for ( i=1; i<size; i++ ) { 2071 nz = procsnz[i]; 2072 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2073 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2074 } 2075 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2076 } else { 2077 /* receive numeric values */ 2078 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 2079 2080 /* receive message of values*/ 2081 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2082 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2083 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2084 2085 /* insert into matrix */ 2086 jj = rstart; 2087 smycols = mycols; 2088 svals = vals; 2089 for ( i=0; i<m; i++ ) { 2090 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2091 smycols += ourlens[i]; 2092 svals += ourlens[i]; 2093 jj++; 2094 } 2095 } 2096 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2097 ierr = PetscFree(vals);CHKERRQ(ierr); 2098 ierr = PetscFree(mycols);CHKERRQ(ierr); 2099 ierr = PetscFree(rowners);CHKERRQ(ierr); 2100 2101 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2102 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2103 PetscFunctionReturn(0); 2104 } 2105 2106 #undef __FUNC__ 2107 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2108 /* 2109 Not great since it makes two copies of the submatrix, first an SeqAIJ 2110 in local and then by concatenating the local matrices the end result. 2111 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2112 */ 2113 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2114 { 2115 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2116 Mat *local,M, Mreuse; 2117 Scalar *vwork,*aa; 2118 MPI_Comm comm = mat->comm; 2119 Mat_SeqAIJ *aij; 2120 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2121 2122 PetscFunctionBegin; 2123 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2124 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2125 2126 if (call == MAT_REUSE_MATRIX) { 2127 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2128 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2129 local = &Mreuse; 2130 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2131 } else { 2132 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2133 Mreuse = *local; 2134 ierr = PetscFree(local);CHKERRQ(ierr); 2135 } 2136 2137 /* 2138 m - number of local rows 2139 n - number of columns (same on all processors) 2140 rstart - first row in new global matrix generated 2141 */ 2142 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2143 if (call == MAT_INITIAL_MATRIX) { 2144 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2145 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2146 ii = aij->i; 2147 jj = aij->j; 2148 2149 /* 2150 Determine the number of non-zeros in the diagonal and off-diagonal 2151 portions of the matrix in order to do correct preallocation 2152 */ 2153 2154 /* first get start and end of "diagonal" columns */ 2155 if (csize == PETSC_DECIDE) { 2156 nlocal = n/size + ((n % size) > rank); 2157 } else { 2158 nlocal = csize; 2159 } 2160 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2161 rstart = rend - nlocal; 2162 if (rank == size - 1 && rend != n) { 2163 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2164 } 2165 2166 /* next, compute all the lengths */ 2167 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2168 olens = dlens + m; 2169 for ( i=0; i<m; i++ ) { 2170 jend = ii[i+1] - ii[i]; 2171 olen = 0; 2172 dlen = 0; 2173 for ( j=0; j<jend; j++ ) { 2174 if ( *jj < rstart || *jj >= rend) olen++; 2175 else dlen++; 2176 jj++; 2177 } 2178 olens[i] = olen; 2179 dlens[i] = dlen; 2180 } 2181 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2182 ierr = PetscFree(dlens);CHKERRQ(ierr); 2183 } else { 2184 int ml,nl; 2185 2186 M = *newmat; 2187 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2188 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2189 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2190 /* 2191 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2192 rather than the slower MatSetValues(). 2193 */ 2194 M->was_assembled = PETSC_TRUE; 2195 M->assembled = PETSC_FALSE; 2196 } 2197 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2198 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2199 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2200 ii = aij->i; 2201 jj = aij->j; 2202 aa = aij->a; 2203 for (i=0; i<m; i++) { 2204 row = rstart + i; 2205 nz = ii[i+1] - ii[i]; 2206 cwork = jj; jj += nz; 2207 vwork = aa; aa += nz; 2208 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2209 } 2210 2211 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2212 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2213 *newmat = M; 2214 2215 /* save submatrix used in processor for next request */ 2216 if (call == MAT_INITIAL_MATRIX) { 2217 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2218 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2219 } 2220 2221 PetscFunctionReturn(0); 2222 } 2223