1 /*$Id: mpiaij.c,v 1.306 1999/10/24 14:02:16 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/mpi/mpiaij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/spops.h" 6 7 extern int MatSetUpMultiply_MPIAIJ(Mat); 8 extern int DisAssemble_MPIAIJ(Mat); 9 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 10 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 11 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 12 extern int MatPrintHelp_SeqAIJ(Mat); 13 14 /* 15 Local utility routine that creates a mapping from the global column 16 number to the local number in the off-diagonal part of the local 17 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 18 a slightly higher hash table cost; without it it is not scalable (each processor 19 has an order N integer array but is fast to acess. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 23 int CreateColmap_MPIAIJ_Private(Mat mat) 24 { 25 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 26 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 27 int n = B->n,i,ierr; 28 29 PetscFunctionBegin; 30 #if defined (PETSC_USE_CTABLE) 31 ierr = PetscTableCreate(aij->n/5,&aij->colmap);CHKERRQ(ierr); 32 for ( i=0; i<n; i++ ){ 33 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr); 34 } 35 #else 36 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 37 PLogObjectMemory(mat,aij->N*sizeof(int)); 38 ierr = PetscMemzero(aij->colmap,aij->N*sizeof(int));CHKERRQ(ierr); 39 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 40 #endif 41 PetscFunctionReturn(0); 42 } 43 44 #define CHUNKSIZE 15 45 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 46 { \ 47 \ 48 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 49 rmax = aimax[row]; nrow = ailen[row]; \ 50 col1 = col - shift; \ 51 \ 52 low = 0; high = nrow; \ 53 while (high-low > 5) { \ 54 t = (low+high)/2; \ 55 if (rp[t] > col) high = t; \ 56 else low = t; \ 57 } \ 58 for ( _i=0; _i<nrow; _i++ ) { \ 59 if (rp[_i] > col1) break; \ 60 if (rp[_i] == col1) { \ 61 if (addv == ADD_VALUES) ap[_i] += value; \ 62 else ap[_i] = value; \ 63 goto a_noinsert; \ 64 } \ 65 } \ 66 if (nonew == 1) goto a_noinsert; \ 67 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 68 if (nrow >= rmax) { \ 69 /* there is no extra room in row, therefore enlarge */ \ 70 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 71 Scalar *new_a; \ 72 \ 73 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 74 \ 75 /* malloc new storage space */ \ 76 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 77 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 78 new_j = (int *) (new_a + new_nz); \ 79 new_i = new_j + new_nz; \ 80 \ 81 /* copy over old data into new slots */ \ 82 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 83 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 84 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 85 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 86 ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 87 len*sizeof(int));CHKERRQ(ierr); \ 88 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 89 ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 90 len*sizeof(Scalar));CHKERRQ(ierr); \ 91 /* free up old matrix storage */ \ 92 \ 93 ierr = PetscFree(a->a);CHKERRQ(ierr); \ 94 if (!a->singlemalloc) { \ 95 ierr = PetscFree(a->i);CHKERRQ(ierr); \ 96 ierr = PetscFree(a->j);CHKERRQ(ierr); \ 97 } \ 98 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 99 a->singlemalloc = 1; \ 100 \ 101 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 102 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 103 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 104 a->maxnz += CHUNKSIZE; \ 105 a->reallocs++; \ 106 } \ 107 N = nrow++ - 1; a->nz++; \ 108 /* shift up all the later entries in this row */ \ 109 for ( ii=N; ii>=_i; ii-- ) { \ 110 rp[ii+1] = rp[ii]; \ 111 ap[ii+1] = ap[ii]; \ 112 } \ 113 rp[_i] = col1; \ 114 ap[_i] = value; \ 115 a_noinsert: ; \ 116 ailen[row] = nrow; \ 117 } 118 119 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 120 { \ 121 \ 122 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 123 rmax = bimax[row]; nrow = bilen[row]; \ 124 col1 = col - shift; \ 125 \ 126 low = 0; high = nrow; \ 127 while (high-low > 5) { \ 128 t = (low+high)/2; \ 129 if (rp[t] > col) high = t; \ 130 else low = t; \ 131 } \ 132 for ( _i=0; _i<nrow; _i++ ) { \ 133 if (rp[_i] > col1) break; \ 134 if (rp[_i] == col1) { \ 135 if (addv == ADD_VALUES) ap[_i] += value; \ 136 else ap[_i] = value; \ 137 goto b_noinsert; \ 138 } \ 139 } \ 140 if (nonew == 1) goto b_noinsert; \ 141 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 142 if (nrow >= rmax) { \ 143 /* there is no extra room in row, therefore enlarge */ \ 144 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 145 Scalar *new_a; \ 146 \ 147 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 148 \ 149 /* malloc new storage space */ \ 150 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 151 new_a = (Scalar *) PetscMalloc( len );CHKPTRQ(new_a); \ 152 new_j = (int *) (new_a + new_nz); \ 153 new_i = new_j + new_nz; \ 154 \ 155 /* copy over old data into new slots */ \ 156 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 157 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 158 ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \ 159 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 160 ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 161 len*sizeof(int));CHKERRQ(ierr); \ 162 ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar));CHKERRQ(ierr); \ 163 ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 164 len*sizeof(Scalar));CHKERRQ(ierr); \ 165 /* free up old matrix storage */ \ 166 \ 167 ierr = PetscFree(b->a);CHKERRQ(ierr); \ 168 if (!b->singlemalloc) { \ 169 ierr = PetscFree(b->i);CHKERRQ(ierr); \ 170 ierr = PetscFree(b->j);CHKERRQ(ierr); \ 171 } \ 172 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 173 b->singlemalloc = 1; \ 174 \ 175 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 176 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 177 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 178 b->maxnz += CHUNKSIZE; \ 179 b->reallocs++; \ 180 } \ 181 N = nrow++ - 1; b->nz++; \ 182 /* shift up all the later entries in this row */ \ 183 for ( ii=N; ii>=_i; ii-- ) { \ 184 rp[ii+1] = rp[ii]; \ 185 ap[ii+1] = ap[ii]; \ 186 } \ 187 rp[_i] = col1; \ 188 ap[_i] = value; \ 189 b_noinsert: ; \ 190 bilen[row] = nrow; \ 191 } 192 193 #undef __FUNC__ 194 #define __FUNC__ "MatSetValues_MPIAIJ" 195 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 196 { 197 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 198 Scalar value; 199 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 200 int cstart = aij->cstart, cend = aij->cend,row,col; 201 int roworiented = aij->roworiented; 202 203 /* Some Variables required in the macro */ 204 Mat A = aij->A; 205 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 206 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 207 Scalar *aa = a->a; 208 209 Mat B = aij->B; 210 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 211 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 212 Scalar *ba = b->a; 213 214 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 215 int nonew = a->nonew,shift = a->indexshift; 216 Scalar *ap; 217 218 PetscFunctionBegin; 219 for ( i=0; i<m; i++ ) { 220 if (im[i] < 0) continue; 221 #if defined(PETSC_USE_BOPT_g) 222 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 223 #endif 224 if (im[i] >= rstart && im[i] < rend) { 225 row = im[i] - rstart; 226 for ( j=0; j<n; j++ ) { 227 if (in[j] >= cstart && in[j] < cend){ 228 col = in[j] - cstart; 229 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 231 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 232 } 233 else if (in[j] < 0) continue; 234 #if defined(PETSC_USE_BOPT_g) 235 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 236 #endif 237 else { 238 if (mat->was_assembled) { 239 if (!aij->colmap) { 240 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 241 } 242 #if defined (PETSC_USE_CTABLE) 243 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 244 col--; 245 #else 246 col = aij->colmap[in[j]] - 1; 247 #endif 248 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 249 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 250 col = in[j]; 251 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 252 B = aij->B; 253 b = (Mat_SeqAIJ *) B->data; 254 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 255 ba = b->a; 256 } 257 } else col = in[j]; 258 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 259 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 260 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 261 } 262 } 263 } else { 264 if (!aij->donotstash) { 265 if (roworiented) { 266 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 267 } else { 268 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 269 } 270 } 271 } 272 } 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNC__ 277 #define __FUNC__ "MatGetValues_MPIAIJ" 278 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 279 { 280 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 281 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 282 int cstart = aij->cstart, cend = aij->cend,row,col; 283 284 PetscFunctionBegin; 285 for ( i=0; i<m; i++ ) { 286 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 287 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 288 if (idxm[i] >= rstart && idxm[i] < rend) { 289 row = idxm[i] - rstart; 290 for ( j=0; j<n; j++ ) { 291 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 292 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 293 if (idxn[j] >= cstart && idxn[j] < cend){ 294 col = idxn[j] - cstart; 295 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 296 } else { 297 if (!aij->colmap) { 298 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 299 } 300 #if defined (PETSC_USE_CTABLE) 301 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 302 col --; 303 #else 304 col = aij->colmap[idxn[j]] - 1; 305 #endif 306 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 307 else { 308 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 309 } 310 } 311 } 312 } else { 313 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 314 } 315 } 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNC__ 320 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 321 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 322 { 323 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 324 int ierr,nstash,reallocs; 325 InsertMode addv; 326 327 PetscFunctionBegin; 328 if (aij->donotstash) { 329 PetscFunctionReturn(0); 330 } 331 332 /* make sure all processors are either in INSERTMODE or ADDMODE */ 333 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 334 if (addv == (ADD_VALUES|INSERT_VALUES)) { 335 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 336 } 337 mat->insertmode = addv; /* in case this processor had no cache */ 338 339 ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr); 340 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 341 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 342 PetscFunctionReturn(0); 343 } 344 345 346 #undef __FUNC__ 347 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 348 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 349 { 350 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 351 int i,j,rstart,ncols,n,ierr,flg; 352 int *row,*col,other_disassembled; 353 Scalar *val; 354 InsertMode addv = mat->insertmode; 355 356 PetscFunctionBegin; 357 if (!aij->donotstash) { 358 while (1) { 359 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 360 if (!flg) break; 361 362 for ( i=0; i<n; ) { 363 /* Now identify the consecutive vals belonging to the same row */ 364 for ( j=i,rstart=row[j]; j<n; j++ ) { if (row[j] != rstart) break; } 365 if (j < n) ncols = j-i; 366 else ncols = n-i; 367 /* Now assemble all these values with a single function call */ 368 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 369 i = j; 370 } 371 } 372 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 373 } 374 375 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 376 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 377 378 /* determine if any processor has disassembled, if so we must 379 also disassemble ourselfs, in order that we may reassemble. */ 380 /* 381 if nonzero structure of submatrix B cannot change then we know that 382 no processor disassembled thus we can skip this stuff 383 */ 384 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 385 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 386 if (mat->was_assembled && !other_disassembled) { 387 ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 388 } 389 } 390 391 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 392 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 393 } 394 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 395 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 396 397 if (aij->rowvalues) { 398 ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr); 399 aij->rowvalues = 0; 400 } 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNC__ 405 #define __FUNC__ "MatZeroEntries_MPIAIJ" 406 int MatZeroEntries_MPIAIJ(Mat A) 407 { 408 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 409 int ierr; 410 411 PetscFunctionBegin; 412 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 413 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNC__ 418 #define __FUNC__ "MatZeroRows_MPIAIJ" 419 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 420 { 421 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 422 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 423 int *procs,*nprocs,j,found,idx,nsends,*work,row; 424 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 425 int *rvalues,tag = A->tag,count,base,slen,n,*source; 426 int *lens,imdex,*lrows,*values,rstart=l->rstart; 427 MPI_Comm comm = A->comm; 428 MPI_Request *send_waits,*recv_waits; 429 MPI_Status recv_status,*send_status; 430 IS istmp; 431 432 PetscFunctionBegin; 433 ierr = ISGetSize(is,&N);CHKERRQ(ierr); 434 ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 435 436 /* first count number of contributors to each processor */ 437 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(nprocs); 438 ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 439 procs = nprocs + size; 440 owner = (int *) PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 441 for ( i=0; i<N; i++ ) { 442 idx = rows[i]; 443 found = 0; 444 for ( j=0; j<size; j++ ) { 445 if (idx >= owners[j] && idx < owners[j+1]) { 446 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 447 } 448 } 449 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 450 } 451 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 452 453 /* inform other processors of number of messages and max length*/ 454 work = (int *) PetscMalloc( 2*size*sizeof(int) );CHKPTRQ(work); 455 ierr = MPI_Allreduce( nprocs, work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 456 nrecvs = work[size+rank]; 457 nmax = work[rank]; 458 ierr = PetscFree(work);CHKERRQ(ierr); 459 460 /* post receives: */ 461 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 462 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 463 for ( i=0; i<nrecvs; i++ ) { 464 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 465 } 466 467 /* do sends: 468 1) starts[i] gives the starting index in svalues for stuff going to 469 the ith processor 470 */ 471 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) );CHKPTRQ(svalues); 472 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 473 starts = (int *) PetscMalloc( (size+1)*sizeof(int) );CHKPTRQ(starts); 474 starts[0] = 0; 475 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 476 for ( i=0; i<N; i++ ) { 477 svalues[starts[owner[i]]++] = rows[i]; 478 } 479 ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 480 481 starts[0] = 0; 482 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 483 count = 0; 484 for ( i=0; i<size; i++ ) { 485 if (procs[i]) { 486 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 487 } 488 } 489 ierr = PetscFree(starts);CHKERRQ(ierr); 490 491 base = owners[rank]; 492 493 /* wait on receives */ 494 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) );CHKPTRQ(lens); 495 source = lens + nrecvs; 496 count = nrecvs; slen = 0; 497 while (count) { 498 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 499 /* unpack receives into our local space */ 500 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 501 source[imdex] = recv_status.MPI_SOURCE; 502 lens[imdex] = n; 503 slen += n; 504 count--; 505 } 506 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 507 508 /* move the data into the send scatter */ 509 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) );CHKPTRQ(lrows); 510 count = 0; 511 for ( i=0; i<nrecvs; i++ ) { 512 values = rvalues + i*nmax; 513 for ( j=0; j<lens[i]; j++ ) { 514 lrows[count++] = values[j] - base; 515 } 516 } 517 ierr = PetscFree(rvalues);CHKERRQ(ierr); 518 ierr = PetscFree(lens);CHKERRQ(ierr); 519 ierr = PetscFree(owner);CHKERRQ(ierr); 520 ierr = PetscFree(nprocs);CHKERRQ(ierr); 521 522 /* actually zap the local rows */ 523 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 524 PLogObjectParent(A,istmp); 525 526 /* 527 Zero the required rows. If the "diagonal block" of the matrix 528 is square and the user wishes to set the diagonal we use seperate 529 code so that MatSetValues() is not called for each diagonal allocating 530 new memory, thus calling lots of mallocs and slowing things down. 531 532 Contributed by: Mathew Knepley 533 */ 534 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 535 ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr); 536 if (diag && (l->A->M == l->A->N)) { 537 ierr = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr); 538 } else if (diag) { 539 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 540 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 541 SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 542 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 543 } 544 for ( i = 0; i < slen; i++ ) { 545 row = lrows[i] + rstart; 546 ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 547 } 548 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 549 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 550 } else { 551 ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr); 552 } 553 ierr = ISDestroy(istmp);CHKERRQ(ierr); 554 ierr = PetscFree(lrows);CHKERRQ(ierr); 555 556 /* wait on sends */ 557 if (nsends) { 558 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 559 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 560 ierr = PetscFree(send_status);CHKERRQ(ierr); 561 } 562 ierr = PetscFree(send_waits);CHKERRQ(ierr); 563 ierr = PetscFree(svalues);CHKERRQ(ierr); 564 565 PetscFunctionReturn(0); 566 } 567 568 #undef __FUNC__ 569 #define __FUNC__ "MatMult_MPIAIJ" 570 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 571 { 572 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 573 int ierr,nt; 574 575 PetscFunctionBegin; 576 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 577 if (nt != a->n) { 578 SETERRQ2(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A (%d) and xx (%d)",a->n,nt); 579 } 580 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 581 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 582 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 583 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 584 PetscFunctionReturn(0); 585 } 586 587 #undef __FUNC__ 588 #define __FUNC__ "MatMultAdd_MPIAIJ" 589 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 590 { 591 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 592 int ierr; 593 594 PetscFunctionBegin; 595 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 596 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 597 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 598 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 599 PetscFunctionReturn(0); 600 } 601 602 #undef __FUNC__ 603 #define __FUNC__ "MatMultTrans_MPIAIJ" 604 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 605 { 606 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 607 int ierr; 608 609 PetscFunctionBegin; 610 /* do nondiagonal part */ 611 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 612 /* send it on its way */ 613 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 614 /* do local part */ 615 ierr = (*a->A->ops->multtrans)(a->A,xx,yy);CHKERRQ(ierr); 616 /* receive remote parts: note this assumes the values are not actually */ 617 /* inserted in yy until the next line, which is true for my implementation*/ 618 /* but is not perhaps always true. */ 619 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNC__ 624 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 625 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 626 { 627 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 628 int ierr; 629 630 PetscFunctionBegin; 631 /* do nondiagonal part */ 632 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec);CHKERRQ(ierr); 633 /* send it on its way */ 634 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 635 /* do local part */ 636 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 637 /* receive remote parts: note this assumes the values are not actually */ 638 /* inserted in yy until the next line, which is true for my implementation*/ 639 /* but is not perhaps always true. */ 640 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 641 PetscFunctionReturn(0); 642 } 643 644 /* 645 This only works correctly for square matrices where the subblock A->A is the 646 diagonal block 647 */ 648 #undef __FUNC__ 649 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 650 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 651 { 652 int ierr; 653 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 654 655 PetscFunctionBegin; 656 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 657 if (a->rstart != a->cstart || a->rend != a->cend) { 658 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 659 } 660 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 661 PetscFunctionReturn(0); 662 } 663 664 #undef __FUNC__ 665 #define __FUNC__ "MatScale_MPIAIJ" 666 int MatScale_MPIAIJ(Scalar *aa,Mat A) 667 { 668 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 669 int ierr; 670 671 PetscFunctionBegin; 672 ierr = MatScale(aa,a->A);CHKERRQ(ierr); 673 ierr = MatScale(aa,a->B);CHKERRQ(ierr); 674 PetscFunctionReturn(0); 675 } 676 677 #undef __FUNC__ 678 #define __FUNC__ "MatDestroy_MPIAIJ" 679 int MatDestroy_MPIAIJ(Mat mat) 680 { 681 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 682 int ierr; 683 684 PetscFunctionBegin; 685 686 if (mat->mapping) { 687 ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 688 } 689 if (mat->bmapping) { 690 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 691 } 692 if (mat->rmap) { 693 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 694 } 695 if (mat->cmap) { 696 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 697 } 698 #if defined(PETSC_USE_LOG) 699 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 700 #endif 701 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 702 ierr = PetscFree(aij->rowners);CHKERRQ(ierr); 703 ierr = MatDestroy(aij->A);CHKERRQ(ierr); 704 ierr = MatDestroy(aij->B);CHKERRQ(ierr); 705 #if defined (PETSC_USE_CTABLE) 706 if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);} 707 #else 708 if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);} 709 #endif 710 if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);} 711 if (aij->lvec) VecDestroy(aij->lvec); 712 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 713 if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);} 714 ierr = PetscFree(aij);CHKERRQ(ierr); 715 PLogObjectDestroy(mat); 716 PetscHeaderDestroy(mat); 717 PetscFunctionReturn(0); 718 } 719 720 #undef __FUNC__ 721 #define __FUNC__ "MatView_MPIAIJ_Binary" 722 int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 723 { 724 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 725 int ierr; 726 727 PetscFunctionBegin; 728 if (aij->size == 1) { 729 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 730 } 731 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 732 PetscFunctionReturn(0); 733 } 734 735 #undef __FUNC__ 736 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworSocket" 737 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 738 { 739 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 740 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 741 int ierr, format,shift = C->indexshift,rank = aij->rank,size = aij->size; 742 PetscTruth isdraw,isascii,flg; 743 744 PetscFunctionBegin; 745 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 746 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 747 if (isascii) { 748 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 749 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 750 MatInfo info; 751 ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 752 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 753 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr); 754 if (flg) { 755 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 756 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 757 } else { 758 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 759 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr); 760 } 761 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 762 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 763 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 764 ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr); 765 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 766 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 767 PetscFunctionReturn(0); 768 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 769 PetscFunctionReturn(0); 770 } 771 } else if (isdraw) { 772 Draw draw; 773 PetscTruth isnull; 774 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 775 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 776 } 777 778 if (size == 1) { 779 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 780 } else { 781 /* assemble the entire matrix onto first processor. */ 782 Mat A; 783 Mat_SeqAIJ *Aloc; 784 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 785 Scalar *a; 786 787 if (!rank) { 788 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 789 } else { 790 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 791 } 792 PLogObjectParent(mat,A); 793 794 /* copy over the A part */ 795 Aloc = (Mat_SeqAIJ*) aij->A->data; 796 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 797 row = aij->rstart; 798 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 799 for ( i=0; i<m; i++ ) { 800 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 801 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 802 } 803 aj = Aloc->j; 804 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 805 806 /* copy over the B part */ 807 Aloc = (Mat_SeqAIJ*) aij->B->data; 808 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 809 row = aij->rstart; 810 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) );CHKPTRQ(cols); 811 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 812 for ( i=0; i<m; i++ ) { 813 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 814 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 815 } 816 ierr = PetscFree(ct);CHKERRQ(ierr); 817 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 818 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 819 /* 820 Everyone has to call to draw the matrix since the graphics waits are 821 synchronized across all processors that share the Draw object 822 */ 823 if (!rank) { 824 Viewer sviewer; 825 ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 826 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 827 ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 828 } 829 ierr = MatDestroy(A);CHKERRQ(ierr); 830 } 831 PetscFunctionReturn(0); 832 } 833 834 #undef __FUNC__ 835 #define __FUNC__ "MatView_MPIAIJ" 836 int MatView_MPIAIJ(Mat mat,Viewer viewer) 837 { 838 int ierr; 839 PetscTruth isascii,isdraw,issocket,isbinary; 840 841 PetscFunctionBegin; 842 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 843 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 844 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 845 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 846 if (isascii || isdraw || isbinary || issocket) { 847 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 848 } else { 849 SETERRQ1(1,1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 850 } 851 PetscFunctionReturn(0); 852 } 853 854 /* 855 This has to provide several versions. 856 857 2) a) use only local smoothing updating outer values only once. 858 b) local smoothing updating outer values each inner iteration 859 3) color updating out values betwen colors. 860 */ 861 #undef __FUNC__ 862 #define __FUNC__ "MatRelax_MPIAIJ" 863 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 864 double fshift,int its,Vec xx) 865 { 866 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 867 Mat AA = mat->A, BB = mat->B; 868 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 869 Scalar *b,*x,*xs,*ls,d,*v,sum; 870 int ierr,*idx, *diag; 871 int n = mat->n, m = mat->m, i,shift = A->indexshift; 872 873 PetscFunctionBegin; 874 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA);CHKERRQ(ierr);} 875 diag = A->diag; 876 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 877 if (flag & SOR_ZERO_INITIAL_GUESS) { 878 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 879 PetscFunctionReturn(0); 880 } 881 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 882 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 883 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 884 if (xx != bb) { 885 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 886 } else { 887 b = x; 888 } 889 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 890 xs = x + shift; /* shift by one for index start of 1 */ 891 ls = ls + shift; 892 while (its--) { 893 /* go down through the rows */ 894 for ( i=0; i<m; i++ ) { 895 n = A->i[i+1] - A->i[i]; 896 PLogFlops(4*n+3); 897 idx = A->j + A->i[i] + shift; 898 v = A->a + A->i[i] + shift; 899 sum = b[i]; 900 SPARSEDENSEMDOT(sum,xs,v,idx,n); 901 d = fshift + A->a[diag[i]+shift]; 902 n = B->i[i+1] - B->i[i]; 903 idx = B->j + B->i[i] + shift; 904 v = B->a + B->i[i] + shift; 905 SPARSEDENSEMDOT(sum,ls,v,idx,n); 906 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 907 } 908 /* come up through the rows */ 909 for ( i=m-1; i>-1; i-- ) { 910 n = A->i[i+1] - A->i[i]; 911 PLogFlops(4*n+3) 912 idx = A->j + A->i[i] + shift; 913 v = A->a + A->i[i] + shift; 914 sum = b[i]; 915 SPARSEDENSEMDOT(sum,xs,v,idx,n); 916 d = fshift + A->a[diag[i]+shift]; 917 n = B->i[i+1] - B->i[i]; 918 idx = B->j + B->i[i] + shift; 919 v = B->a + B->i[i] + shift; 920 SPARSEDENSEMDOT(sum,ls,v,idx,n); 921 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 922 } 923 } 924 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 925 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 926 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 927 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 928 if (flag & SOR_ZERO_INITIAL_GUESS) { 929 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 930 PetscFunctionReturn(0); 931 } 932 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 933 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 934 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 935 if (xx != bb) { 936 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 937 } else { 938 b = x; 939 } 940 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 941 xs = x + shift; /* shift by one for index start of 1 */ 942 ls = ls + shift; 943 while (its--) { 944 for ( i=0; i<m; i++ ) { 945 n = A->i[i+1] - A->i[i]; 946 PLogFlops(4*n+3); 947 idx = A->j + A->i[i] + shift; 948 v = A->a + A->i[i] + shift; 949 sum = b[i]; 950 SPARSEDENSEMDOT(sum,xs,v,idx,n); 951 d = fshift + A->a[diag[i]+shift]; 952 n = B->i[i+1] - B->i[i]; 953 idx = B->j + B->i[i] + shift; 954 v = B->a + B->i[i] + shift; 955 SPARSEDENSEMDOT(sum,ls,v,idx,n); 956 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 957 } 958 } 959 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 960 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 961 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 962 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 963 if (flag & SOR_ZERO_INITIAL_GUESS) { 964 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 968 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 969 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 970 if (xx != bb) { 971 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 972 } else { 973 b = x; 974 } 975 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 976 xs = x + shift; /* shift by one for index start of 1 */ 977 ls = ls + shift; 978 while (its--) { 979 for ( i=m-1; i>-1; i-- ) { 980 n = A->i[i+1] - A->i[i]; 981 PLogFlops(4*n+3); 982 idx = A->j + A->i[i] + shift; 983 v = A->a + A->i[i] + shift; 984 sum = b[i]; 985 SPARSEDENSEMDOT(sum,xs,v,idx,n); 986 d = fshift + A->a[diag[i]+shift]; 987 n = B->i[i+1] - B->i[i]; 988 idx = B->j + B->i[i] + shift; 989 v = B->a + B->i[i] + shift; 990 SPARSEDENSEMDOT(sum,ls,v,idx,n); 991 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 992 } 993 } 994 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 995 if (bb != xx) {ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); } 996 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 997 } else { 998 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 999 } 1000 PetscFunctionReturn(0); 1001 } 1002 1003 #undef __FUNC__ 1004 #define __FUNC__ "MatGetInfo_MPIAIJ" 1005 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1006 { 1007 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1008 Mat A = mat->A, B = mat->B; 1009 int ierr; 1010 double isend[5], irecv[5]; 1011 1012 PetscFunctionBegin; 1013 info->block_size = 1.0; 1014 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1015 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1016 isend[3] = info->memory; isend[4] = info->mallocs; 1017 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1018 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1019 isend[3] += info->memory; isend[4] += info->mallocs; 1020 if (flag == MAT_LOCAL) { 1021 info->nz_used = isend[0]; 1022 info->nz_allocated = isend[1]; 1023 info->nz_unneeded = isend[2]; 1024 info->memory = isend[3]; 1025 info->mallocs = isend[4]; 1026 } else if (flag == MAT_GLOBAL_MAX) { 1027 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1028 info->nz_used = irecv[0]; 1029 info->nz_allocated = irecv[1]; 1030 info->nz_unneeded = irecv[2]; 1031 info->memory = irecv[3]; 1032 info->mallocs = irecv[4]; 1033 } else if (flag == MAT_GLOBAL_SUM) { 1034 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1035 info->nz_used = irecv[0]; 1036 info->nz_allocated = irecv[1]; 1037 info->nz_unneeded = irecv[2]; 1038 info->memory = irecv[3]; 1039 info->mallocs = irecv[4]; 1040 } 1041 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1042 info->fill_ratio_needed = 0; 1043 info->factor_mallocs = 0; 1044 info->rows_global = (double)mat->M; 1045 info->columns_global = (double)mat->N; 1046 info->rows_local = (double)mat->m; 1047 info->columns_local = (double)mat->N; 1048 1049 PetscFunctionReturn(0); 1050 } 1051 1052 #undef __FUNC__ 1053 #define __FUNC__ "MatSetOption_MPIAIJ" 1054 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1055 { 1056 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1057 int ierr; 1058 1059 PetscFunctionBegin; 1060 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1061 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1062 op == MAT_COLUMNS_UNSORTED || 1063 op == MAT_COLUMNS_SORTED || 1064 op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1065 op == MAT_NEW_NONZERO_LOCATION_ERR) { 1066 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1067 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1068 } else if (op == MAT_ROW_ORIENTED) { 1069 a->roworiented = 1; 1070 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1071 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1072 } else if (op == MAT_ROWS_SORTED || 1073 op == MAT_ROWS_UNSORTED || 1074 op == MAT_SYMMETRIC || 1075 op == MAT_STRUCTURALLY_SYMMETRIC || 1076 op == MAT_YES_NEW_DIAGONALS) 1077 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1078 else if (op == MAT_COLUMN_ORIENTED) { 1079 a->roworiented = 0; 1080 ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1081 ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1082 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1083 a->donotstash = 1; 1084 } else if (op == MAT_NO_NEW_DIAGONALS){ 1085 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1086 } else { 1087 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1088 } 1089 PetscFunctionReturn(0); 1090 } 1091 1092 #undef __FUNC__ 1093 #define __FUNC__ "MatGetSize_MPIAIJ" 1094 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1095 { 1096 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1097 1098 PetscFunctionBegin; 1099 if (m) *m = mat->M; 1100 if (n) *n = mat->N; 1101 PetscFunctionReturn(0); 1102 } 1103 1104 #undef __FUNC__ 1105 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1106 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1107 { 1108 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1109 1110 PetscFunctionBegin; 1111 if (m) *m = mat->m; 1112 if (n) *n = mat->n; 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNC__ 1117 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1118 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1119 { 1120 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1121 1122 PetscFunctionBegin; 1123 *m = mat->rstart; *n = mat->rend; 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNC__ 1128 #define __FUNC__ "MatGetRow_MPIAIJ" 1129 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1130 { 1131 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1132 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1133 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1134 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1135 int *cmap, *idx_p; 1136 1137 PetscFunctionBegin; 1138 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1139 mat->getrowactive = PETSC_TRUE; 1140 1141 if (!mat->rowvalues && (idx || v)) { 1142 /* 1143 allocate enough space to hold information from the longest row. 1144 */ 1145 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1146 int max = 1,m = mat->m,tmp; 1147 for ( i=0; i<m; i++ ) { 1148 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1149 if (max < tmp) { max = tmp; } 1150 } 1151 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1152 mat->rowindices = (int *) (mat->rowvalues + max); 1153 } 1154 1155 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1156 lrow = row - rstart; 1157 1158 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1159 if (!v) {pvA = 0; pvB = 0;} 1160 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1161 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1162 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1163 nztot = nzA + nzB; 1164 1165 cmap = mat->garray; 1166 if (v || idx) { 1167 if (nztot) { 1168 /* Sort by increasing column numbers, assuming A and B already sorted */ 1169 int imark = -1; 1170 if (v) { 1171 *v = v_p = mat->rowvalues; 1172 for ( i=0; i<nzB; i++ ) { 1173 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1174 else break; 1175 } 1176 imark = i; 1177 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1178 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1179 } 1180 if (idx) { 1181 *idx = idx_p = mat->rowindices; 1182 if (imark > -1) { 1183 for ( i=0; i<imark; i++ ) { 1184 idx_p[i] = cmap[cworkB[i]]; 1185 } 1186 } else { 1187 for ( i=0; i<nzB; i++ ) { 1188 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1189 else break; 1190 } 1191 imark = i; 1192 } 1193 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1194 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1195 } 1196 } else { 1197 if (idx) *idx = 0; 1198 if (v) *v = 0; 1199 } 1200 } 1201 *nz = nztot; 1202 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1203 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNC__ 1208 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1209 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1210 { 1211 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1212 1213 PetscFunctionBegin; 1214 if (aij->getrowactive == PETSC_FALSE) { 1215 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1216 } 1217 aij->getrowactive = PETSC_FALSE; 1218 PetscFunctionReturn(0); 1219 } 1220 1221 #undef __FUNC__ 1222 #define __FUNC__ "MatNorm_MPIAIJ" 1223 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1224 { 1225 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1226 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1227 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1228 double sum = 0.0; 1229 Scalar *v; 1230 1231 PetscFunctionBegin; 1232 if (aij->size == 1) { 1233 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1234 } else { 1235 if (type == NORM_FROBENIUS) { 1236 v = amat->a; 1237 for (i=0; i<amat->nz; i++ ) { 1238 #if defined(PETSC_USE_COMPLEX) 1239 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1240 #else 1241 sum += (*v)*(*v); v++; 1242 #endif 1243 } 1244 v = bmat->a; 1245 for (i=0; i<bmat->nz; i++ ) { 1246 #if defined(PETSC_USE_COMPLEX) 1247 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1248 #else 1249 sum += (*v)*(*v); v++; 1250 #endif 1251 } 1252 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1253 *norm = sqrt(*norm); 1254 } else if (type == NORM_1) { /* max column norm */ 1255 double *tmp, *tmp2; 1256 int *jj, *garray = aij->garray; 1257 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp); 1258 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) );CHKPTRQ(tmp2); 1259 ierr = PetscMemzero(tmp,aij->N*sizeof(double));CHKERRQ(ierr); 1260 *norm = 0.0; 1261 v = amat->a; jj = amat->j; 1262 for ( j=0; j<amat->nz; j++ ) { 1263 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1264 } 1265 v = bmat->a; jj = bmat->j; 1266 for ( j=0; j<bmat->nz; j++ ) { 1267 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1268 } 1269 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1270 for ( j=0; j<aij->N; j++ ) { 1271 if (tmp2[j] > *norm) *norm = tmp2[j]; 1272 } 1273 ierr = PetscFree(tmp);CHKERRQ(ierr); 1274 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1275 } else if (type == NORM_INFINITY) { /* max row norm */ 1276 double ntemp = 0.0; 1277 for ( j=0; j<amat->m; j++ ) { 1278 v = amat->a + amat->i[j] + shift; 1279 sum = 0.0; 1280 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1281 sum += PetscAbsScalar(*v); v++; 1282 } 1283 v = bmat->a + bmat->i[j] + shift; 1284 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1285 sum += PetscAbsScalar(*v); v++; 1286 } 1287 if (sum > ntemp) ntemp = sum; 1288 } 1289 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1290 } else { 1291 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1292 } 1293 } 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #undef __FUNC__ 1298 #define __FUNC__ "MatTranspose_MPIAIJ" 1299 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1300 { 1301 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1302 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1303 int ierr,shift = Aloc->indexshift; 1304 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1305 Mat B; 1306 Scalar *array; 1307 1308 PetscFunctionBegin; 1309 if (matout == PETSC_NULL && M != N) { 1310 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1311 } 1312 1313 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1314 1315 /* copy over the A part */ 1316 Aloc = (Mat_SeqAIJ*) a->A->data; 1317 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1318 row = a->rstart; 1319 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1320 for ( i=0; i<m; i++ ) { 1321 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1322 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1323 } 1324 aj = Aloc->j; 1325 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1326 1327 /* copy over the B part */ 1328 Aloc = (Mat_SeqAIJ*) a->B->data; 1329 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1330 row = a->rstart; 1331 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) );CHKPTRQ(cols); 1332 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1333 for ( i=0; i<m; i++ ) { 1334 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1335 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1336 } 1337 ierr = PetscFree(ct);CHKERRQ(ierr); 1338 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1339 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1340 if (matout != PETSC_NULL) { 1341 *matout = B; 1342 } else { 1343 PetscOps *Abops; 1344 struct _MatOps *Aops; 1345 1346 /* This isn't really an in-place transpose .... but free data structures from a */ 1347 ierr = PetscFree(a->rowners);CHKERRQ(ierr); 1348 ierr = MatDestroy(a->A);CHKERRQ(ierr); 1349 ierr = MatDestroy(a->B);CHKERRQ(ierr); 1350 #if defined (PETSC_USE_CTABLE) 1351 if (a->colmap) {ierr = PetscTableDelete(a->colmap);CHKERRQ(ierr);} 1352 #else 1353 if (a->colmap) {ierr = PetscFree(a->colmap);CHKERRQ(ierr);} 1354 #endif 1355 if (a->garray) {ierr = PetscFree(a->garray);CHKERRQ(ierr);} 1356 if (a->lvec) VecDestroy(a->lvec); 1357 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1358 ierr = PetscFree(a);CHKERRQ(ierr); 1359 1360 /* 1361 This is horrible, horrible code. We need to keep the 1362 A pointers for the bops and ops but copy everything 1363 else from C. 1364 */ 1365 Abops = A->bops; 1366 Aops = A->ops; 1367 ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1368 A->bops = Abops; 1369 A->ops = Aops; 1370 PetscHeaderDestroy(B); 1371 } 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNC__ 1376 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1377 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1378 { 1379 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1380 Mat a = aij->A, b = aij->B; 1381 int ierr,s1,s2,s3; 1382 1383 PetscFunctionBegin; 1384 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1385 if (rr) { 1386 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1387 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1388 /* Overlap communication with computation. */ 1389 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1390 } 1391 if (ll) { 1392 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1393 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1394 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 1395 } 1396 /* scale the diagonal block */ 1397 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1398 1399 if (rr) { 1400 /* Do a scatter end and then right scale the off-diagonal block */ 1401 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr); 1402 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 1403 } 1404 1405 PetscFunctionReturn(0); 1406 } 1407 1408 1409 #undef __FUNC__ 1410 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1411 int MatPrintHelp_MPIAIJ(Mat A) 1412 { 1413 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1414 int ierr; 1415 1416 PetscFunctionBegin; 1417 if (!a->rank) { 1418 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1419 } 1420 PetscFunctionReturn(0); 1421 } 1422 1423 #undef __FUNC__ 1424 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1425 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1426 { 1427 PetscFunctionBegin; 1428 *bs = 1; 1429 PetscFunctionReturn(0); 1430 } 1431 #undef __FUNC__ 1432 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1433 int MatSetUnfactored_MPIAIJ(Mat A) 1434 { 1435 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1436 int ierr; 1437 1438 PetscFunctionBegin; 1439 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1440 PetscFunctionReturn(0); 1441 } 1442 1443 #undef __FUNC__ 1444 #define __FUNC__ "MatEqual_MPIAIJ" 1445 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1446 { 1447 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1448 Mat a, b, c, d; 1449 PetscTruth flg; 1450 int ierr; 1451 1452 PetscFunctionBegin; 1453 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1454 a = matA->A; b = matA->B; 1455 c = matB->A; d = matB->B; 1456 1457 ierr = MatEqual(a, c, &flg);CHKERRQ(ierr); 1458 if (flg == PETSC_TRUE) { 1459 ierr = MatEqual(b, d, &flg);CHKERRQ(ierr); 1460 } 1461 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1462 PetscFunctionReturn(0); 1463 } 1464 1465 #undef __FUNC__ 1466 #define __FUNC__ "MatCopy_MPIAIJ" 1467 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1468 { 1469 int ierr; 1470 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1471 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1472 1473 PetscFunctionBegin; 1474 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1475 /* because of the column compression in the off-processor part of the matrix a->B, 1476 the number of columns in a->B and b->B may be different, hence we cannot call 1477 the MatCopy() directly on the two parts. If need be, we can provide a more 1478 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1479 then copying the submatrices */ 1480 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1481 } else { 1482 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1483 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1484 } 1485 PetscFunctionReturn(0); 1486 } 1487 1488 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1489 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1490 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1491 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatReuse,Mat **); 1492 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatReuse,Mat *); 1493 1494 /* -------------------------------------------------------------------*/ 1495 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1496 MatGetRow_MPIAIJ, 1497 MatRestoreRow_MPIAIJ, 1498 MatMult_MPIAIJ, 1499 MatMultAdd_MPIAIJ, 1500 MatMultTrans_MPIAIJ, 1501 MatMultTransAdd_MPIAIJ, 1502 0, 1503 0, 1504 0, 1505 0, 1506 0, 1507 0, 1508 MatRelax_MPIAIJ, 1509 MatTranspose_MPIAIJ, 1510 MatGetInfo_MPIAIJ, 1511 MatEqual_MPIAIJ, 1512 MatGetDiagonal_MPIAIJ, 1513 MatDiagonalScale_MPIAIJ, 1514 MatNorm_MPIAIJ, 1515 MatAssemblyBegin_MPIAIJ, 1516 MatAssemblyEnd_MPIAIJ, 1517 0, 1518 MatSetOption_MPIAIJ, 1519 MatZeroEntries_MPIAIJ, 1520 MatZeroRows_MPIAIJ, 1521 0, 1522 0, 1523 0, 1524 0, 1525 MatGetSize_MPIAIJ, 1526 MatGetLocalSize_MPIAIJ, 1527 MatGetOwnershipRange_MPIAIJ, 1528 0, 1529 0, 1530 0, 1531 0, 1532 MatDuplicate_MPIAIJ, 1533 0, 1534 0, 1535 0, 1536 0, 1537 0, 1538 MatGetSubMatrices_MPIAIJ, 1539 MatIncreaseOverlap_MPIAIJ, 1540 MatGetValues_MPIAIJ, 1541 MatCopy_MPIAIJ, 1542 MatPrintHelp_MPIAIJ, 1543 MatScale_MPIAIJ, 1544 0, 1545 0, 1546 0, 1547 MatGetBlockSize_MPIAIJ, 1548 0, 1549 0, 1550 0, 1551 0, 1552 MatFDColoringCreate_MPIAIJ, 1553 0, 1554 MatSetUnfactored_MPIAIJ, 1555 0, 1556 0, 1557 MatGetSubMatrix_MPIAIJ, 1558 0, 1559 0, 1560 MatGetMaps_Petsc}; 1561 1562 /* ----------------------------------------------------------------------------------------*/ 1563 1564 EXTERN_C_BEGIN 1565 #undef __FUNC__ 1566 #define __FUNC__ "MatStoreValues_MPIAIJ" 1567 int MatStoreValues_MPIAIJ(Mat mat) 1568 { 1569 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1570 int ierr; 1571 1572 PetscFunctionBegin; 1573 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1574 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1575 PetscFunctionReturn(0); 1576 } 1577 EXTERN_C_END 1578 1579 EXTERN_C_BEGIN 1580 #undef __FUNC__ 1581 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1582 int MatRetrieveValues_MPIAIJ(Mat mat) 1583 { 1584 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1585 int ierr; 1586 1587 PetscFunctionBegin; 1588 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1589 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1590 PetscFunctionReturn(0); 1591 } 1592 EXTERN_C_END 1593 1594 #include "pc.h" 1595 EXTERN_C_BEGIN 1596 extern int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *); 1597 EXTERN_C_END 1598 1599 #undef __FUNC__ 1600 #define __FUNC__ "MatCreateMPIAIJ" 1601 /*@C 1602 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1603 (the default parallel PETSc format). For good matrix assembly performance 1604 the user should preallocate the matrix storage by setting the parameters 1605 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1606 performance can be increased by more than a factor of 50. 1607 1608 Collective on MPI_Comm 1609 1610 Input Parameters: 1611 + comm - MPI communicator 1612 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1613 This value should be the same as the local size used in creating the 1614 y vector for the matrix-vector product y = Ax. 1615 . n - This value should be the same as the local size used in creating the 1616 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1617 calculated if N is given) For square matrices n is almost always m. 1618 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1619 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1620 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1621 (same value is used for all local rows) 1622 . d_nnz - array containing the number of nonzeros in the various rows of the 1623 DIAGONAL portion of the local submatrix (possibly different for each row) 1624 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1625 The size of this array is equal to the number of local rows, i.e 'm'. 1626 You must leave room for the diagonal entry even if it is zero. 1627 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1628 submatrix (same value is used for all local rows). 1629 - o_nnz - array containing the number of nonzeros in the various rows of the 1630 OFF-DIAGONAL portion of the local submatrix (possibly different for 1631 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1632 structure. The size of this array is equal to the number 1633 of local rows, i.e 'm'. 1634 1635 Output Parameter: 1636 . A - the matrix 1637 1638 Notes: 1639 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1640 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1641 storage requirements for this matrix. 1642 1643 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1644 processor than it must be used on all processors that share the object for 1645 that argument. 1646 1647 The AIJ format (also called the Yale sparse matrix format or 1648 compressed row storage), is fully compatible with standard Fortran 77 1649 storage. That is, the stored row and column indices can begin at 1650 either one (as in Fortran) or zero. See the users manual for details. 1651 1652 The user MUST specify either the local or global matrix dimensions 1653 (possibly both). 1654 1655 The parallel matrix is partitioned such that the first m0 rows belong to 1656 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1657 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1658 1659 The DIAGONAL portion of the local submatrix of a processor can be defined 1660 as the submatrix which is obtained by extraction the part corresponding 1661 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1662 first row that belongs to the processor, and r2 is the last row belonging 1663 to the this processor. This is a square mxm matrix. The remaining portion 1664 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1665 1666 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1667 1668 By default, this format uses inodes (identical nodes) when possible. 1669 We search for consecutive rows with the same nonzero structure, thereby 1670 reusing matrix information to achieve increased efficiency. 1671 1672 Options Database Keys: 1673 + -mat_aij_no_inode - Do not use inodes 1674 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1675 - -mat_aij_oneindex - Internally use indexing starting at 1 1676 rather than 0. Note that when calling MatSetValues(), 1677 the user still MUST index entries starting at 0! 1678 . -mat_mpi - use the parallel matrix data structures even on one processor 1679 (defaults to using SeqBAIJ format on one processor) 1680 . -mat_mpi - use the parallel matrix data structures even on one processor 1681 (defaults to using SeqAIJ format on one processor) 1682 1683 1684 Example usage: 1685 1686 Consider the following 8x8 matrix with 34 non-zero values, that is 1687 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1688 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1689 as follows: 1690 1691 .vb 1692 1 2 0 | 0 3 0 | 0 4 1693 Proc0 0 5 6 | 7 0 0 | 8 0 1694 9 0 10 | 11 0 0 | 12 0 1695 ------------------------------------- 1696 13 0 14 | 15 16 17 | 0 0 1697 Proc1 0 18 0 | 19 20 21 | 0 0 1698 0 0 0 | 22 23 0 | 24 0 1699 ------------------------------------- 1700 Proc2 25 26 27 | 0 0 28 | 29 0 1701 30 0 0 | 31 32 33 | 0 34 1702 .ve 1703 1704 This can be represented as a collection of submatrices as: 1705 1706 .vb 1707 A B C 1708 D E F 1709 G H I 1710 .ve 1711 1712 Where the submatrices A,B,C are owned by proc0, D,E,F are 1713 owned by proc1, G,H,I are owned by proc2. 1714 1715 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1716 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1717 The 'M','N' parameters are 8,8, and have the same values on all procs. 1718 1719 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1720 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1721 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1722 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1723 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1724 matrix, ans [DF] as another SeqAIJ matrix. 1725 1726 When d_nz, o_nz parameters are specified, d_nz storage elements are 1727 allocated for every row of the local diagonal submatrix, and o_nz 1728 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1729 One way to choose d_nz and o_nz is to use the max nonzerors per local 1730 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1731 In this case, the values of d_nz,o_nz are: 1732 .vb 1733 proc0 : dnz = 2, o_nz = 2 1734 proc1 : dnz = 3, o_nz = 2 1735 proc2 : dnz = 1, o_nz = 4 1736 .ve 1737 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1738 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1739 for proc3. i.e we are using 12+15+10=37 storage locations to store 1740 34 values. 1741 1742 When d_nnz, o_nnz parameters are specified, the storage is specified 1743 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1744 In the above case the values for d_nnz,o_nnz are: 1745 .vb 1746 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1747 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1748 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1749 .ve 1750 Here the space allocated is sum of all the above values i.e 34, and 1751 hence pre-allocation is perfect. 1752 1753 Level: intermediate 1754 1755 .keywords: matrix, aij, compressed row, sparse, parallel 1756 1757 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1758 @*/ 1759 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1760 { 1761 Mat B; 1762 Mat_MPIAIJ *b; 1763 int ierr, i,size; 1764 PetscTruth flag1, flag2; 1765 1766 PetscFunctionBegin; 1767 1768 if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz); 1769 if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz); 1770 if (d_nnz) { 1771 for (i=0; i<m; i++) { 1772 if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]); 1773 } 1774 } 1775 if (o_nnz) { 1776 for (i=0; i<m; i++) { 1777 if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]); 1778 } 1779 } 1780 1781 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1782 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1);CHKERRQ(ierr); 1783 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 1784 if (!flag1 && !flag2 && size == 1) { 1785 if (M == PETSC_DECIDE) M = m; 1786 if (N == PETSC_DECIDE) N = n; 1787 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A);CHKERRQ(ierr); 1788 PetscFunctionReturn(0); 1789 } 1790 1791 *A = 0; 1792 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1793 PLogObjectCreate(B); 1794 B->data = (void *) (b = PetscNew(Mat_MPIAIJ));CHKPTRQ(b); 1795 ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr); 1796 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1797 B->ops->destroy = MatDestroy_MPIAIJ; 1798 B->ops->view = MatView_MPIAIJ; 1799 B->factor = 0; 1800 B->assembled = PETSC_FALSE; 1801 B->mapping = 0; 1802 1803 B->insertmode = NOT_SET_VALUES; 1804 b->size = size; 1805 ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 1806 1807 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1808 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1809 } 1810 1811 ierr = PetscSplitOwnership(comm,&m,&M);CHKERRQ(ierr); 1812 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1813 b->m = m; B->m = m; 1814 b->n = n; B->n = n; 1815 b->N = N; B->N = N; 1816 b->M = M; B->M = M; 1817 1818 /* the information in the maps duplicates the information computed below, eventually 1819 we should remove the duplicate information that is not contained in the maps */ 1820 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1821 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1822 1823 /* build local table of row and column ownerships */ 1824 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 1825 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1826 b->cowners = b->rowners + b->size + 2; 1827 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1828 b->rowners[0] = 0; 1829 for ( i=2; i<=b->size; i++ ) { 1830 b->rowners[i] += b->rowners[i-1]; 1831 } 1832 b->rstart = b->rowners[b->rank]; 1833 b->rend = b->rowners[b->rank+1]; 1834 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1835 b->cowners[0] = 0; 1836 for ( i=2; i<=b->size; i++ ) { 1837 b->cowners[i] += b->cowners[i-1]; 1838 } 1839 b->cstart = b->cowners[b->rank]; 1840 b->cend = b->cowners[b->rank+1]; 1841 1842 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1843 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 1844 PLogObjectParent(B,b->A); 1845 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1846 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 1847 PLogObjectParent(B,b->B); 1848 1849 /* build cache for off array entries formed */ 1850 ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr); 1851 b->donotstash = 0; 1852 b->colmap = 0; 1853 b->garray = 0; 1854 b->roworiented = 1; 1855 1856 /* stuff used for matrix vector multiply */ 1857 b->lvec = 0; 1858 b->Mvctx = 0; 1859 1860 /* stuff for MatGetRow() */ 1861 b->rowindices = 0; 1862 b->rowvalues = 0; 1863 b->getrowactive = PETSC_FALSE; 1864 1865 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1866 "MatStoreValues_MPIAIJ", 1867 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1868 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1869 "MatRetrieveValues_MPIAIJ", 1870 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1871 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 1872 "MatGetDiagonalBlock_MPIAIJ", 1873 (void*)MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 1874 *A = B; 1875 PetscFunctionReturn(0); 1876 } 1877 1878 #undef __FUNC__ 1879 #define __FUNC__ "MatDuplicate_MPIAIJ" 1880 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1881 { 1882 Mat mat; 1883 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1884 int ierr, len = 0; 1885 PetscTruth flg; 1886 1887 PetscFunctionBegin; 1888 *newmat = 0; 1889 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1890 PLogObjectCreate(mat); 1891 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ));CHKPTRQ(a); 1892 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1893 mat->ops->destroy = MatDestroy_MPIAIJ; 1894 mat->ops->view = MatView_MPIAIJ; 1895 mat->factor = matin->factor; 1896 mat->assembled = PETSC_TRUE; 1897 mat->insertmode = NOT_SET_VALUES; 1898 1899 a->m = mat->m = oldmat->m; 1900 a->n = mat->n = oldmat->n; 1901 a->M = mat->M = oldmat->M; 1902 a->N = mat->N = oldmat->N; 1903 1904 a->rstart = oldmat->rstart; 1905 a->rend = oldmat->rend; 1906 a->cstart = oldmat->cstart; 1907 a->cend = oldmat->cend; 1908 a->size = oldmat->size; 1909 a->rank = oldmat->rank; 1910 a->donotstash = oldmat->donotstash; 1911 a->roworiented = oldmat->roworiented; 1912 a->rowindices = 0; 1913 a->rowvalues = 0; 1914 a->getrowactive = PETSC_FALSE; 1915 1916 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 1917 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1918 a->cowners = a->rowners + a->size + 2; 1919 ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr); 1920 ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 1921 if (oldmat->colmap) { 1922 #if defined (PETSC_USE_CTABLE) 1923 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 1924 #else 1925 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1926 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1927 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int));CHKERRQ(ierr); 1928 #endif 1929 } else a->colmap = 0; 1930 if (oldmat->garray) { 1931 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1932 a->garray = (int *) PetscMalloc((len+1)*sizeof(int));CHKPTRQ(a->garray); 1933 PLogObjectMemory(mat,len*sizeof(int)); 1934 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); } 1935 } else a->garray = 0; 1936 1937 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 1938 PLogObjectParent(mat,a->lvec); 1939 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 1940 PLogObjectParent(mat,a->Mvctx); 1941 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 1942 PLogObjectParent(mat,a->A); 1943 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 1944 PLogObjectParent(mat,a->B); 1945 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1946 if (flg) { 1947 ierr = MatPrintHelp(mat);CHKERRQ(ierr); 1948 } 1949 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1950 *newmat = mat; 1951 PetscFunctionReturn(0); 1952 } 1953 1954 #include "sys.h" 1955 1956 #undef __FUNC__ 1957 #define __FUNC__ "MatLoad_MPIAIJ" 1958 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1959 { 1960 Mat A; 1961 Scalar *vals,*svals; 1962 MPI_Comm comm = ((PetscObject)viewer)->comm; 1963 MPI_Status status; 1964 int i, nz, ierr, j,rstart, rend, fd; 1965 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1966 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1967 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 1968 1969 PetscFunctionBegin; 1970 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1971 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 1972 if (!rank) { 1973 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1974 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 1975 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1976 if (header[3] < 0) { 1977 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1978 } 1979 } 1980 1981 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1982 M = header[1]; N = header[2]; 1983 /* determine ownership of all rows */ 1984 m = M/size + ((M % size) > rank); 1985 rowners = (int *) PetscMalloc((size+2)*sizeof(int));CHKPTRQ(rowners); 1986 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1987 rowners[0] = 0; 1988 for ( i=2; i<=size; i++ ) { 1989 rowners[i] += rowners[i-1]; 1990 } 1991 rstart = rowners[rank]; 1992 rend = rowners[rank+1]; 1993 1994 /* distribute row lengths to all processors */ 1995 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) );CHKPTRQ(ourlens); 1996 offlens = ourlens + (rend-rstart); 1997 if (!rank) { 1998 rowlengths = (int*) PetscMalloc( M*sizeof(int) );CHKPTRQ(rowlengths); 1999 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2000 sndcounts = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(sndcounts); 2001 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 2002 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2003 ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2004 } else { 2005 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 2006 } 2007 2008 if (!rank) { 2009 /* calculate the number of nonzeros on each processor */ 2010 procsnz = (int*) PetscMalloc( size*sizeof(int) );CHKPTRQ(procsnz); 2011 ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2012 for ( i=0; i<size; i++ ) { 2013 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 2014 procsnz[i] += rowlengths[j]; 2015 } 2016 } 2017 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2018 2019 /* determine max buffer needed and allocate it */ 2020 maxnz = 0; 2021 for ( i=0; i<size; i++ ) { 2022 maxnz = PetscMax(maxnz,procsnz[i]); 2023 } 2024 cols = (int *) PetscMalloc( maxnz*sizeof(int) );CHKPTRQ(cols); 2025 2026 /* read in my part of the matrix column indices */ 2027 nz = procsnz[0]; 2028 mycols = (int *) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 2029 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2030 2031 /* read in every one elses and ship off */ 2032 for ( i=1; i<size; i++ ) { 2033 nz = procsnz[i]; 2034 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2035 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2036 } 2037 ierr = PetscFree(cols);CHKERRQ(ierr); 2038 } else { 2039 /* determine buffer space needed for message */ 2040 nz = 0; 2041 for ( i=0; i<m; i++ ) { 2042 nz += ourlens[i]; 2043 } 2044 mycols = (int*) PetscMalloc( nz*sizeof(int) );CHKPTRQ(mycols); 2045 2046 /* receive message of column indices*/ 2047 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2048 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2049 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2050 } 2051 2052 /* determine column ownership if matrix is not square */ 2053 if (N != M) { 2054 n = N/size + ((N % size) > rank); 2055 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2056 cstart = cend - n; 2057 } else { 2058 cstart = rstart; 2059 cend = rend; 2060 n = cend - cstart; 2061 } 2062 2063 /* loop over local rows, determining number of off diagonal entries */ 2064 ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr); 2065 jj = 0; 2066 for ( i=0; i<m; i++ ) { 2067 for ( j=0; j<ourlens[i]; j++ ) { 2068 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2069 jj++; 2070 } 2071 } 2072 2073 /* create our matrix */ 2074 for ( i=0; i<m; i++ ) { 2075 ourlens[i] -= offlens[i]; 2076 } 2077 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2078 A = *newmat; 2079 ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); 2080 for ( i=0; i<m; i++ ) { 2081 ourlens[i] += offlens[i]; 2082 } 2083 2084 if (!rank) { 2085 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) );CHKPTRQ(vals); 2086 2087 /* read in my part of the matrix numerical values */ 2088 nz = procsnz[0]; 2089 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2090 2091 /* insert into matrix */ 2092 jj = rstart; 2093 smycols = mycols; 2094 svals = vals; 2095 for ( i=0; i<m; i++ ) { 2096 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2097 smycols += ourlens[i]; 2098 svals += ourlens[i]; 2099 jj++; 2100 } 2101 2102 /* read in other processors and ship out */ 2103 for ( i=1; i<size; i++ ) { 2104 nz = procsnz[i]; 2105 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2106 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2107 } 2108 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2109 } else { 2110 /* receive numeric values */ 2111 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) );CHKPTRQ(vals); 2112 2113 /* receive message of values*/ 2114 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2115 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2116 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2117 2118 /* insert into matrix */ 2119 jj = rstart; 2120 smycols = mycols; 2121 svals = vals; 2122 for ( i=0; i<m; i++ ) { 2123 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2124 smycols += ourlens[i]; 2125 svals += ourlens[i]; 2126 jj++; 2127 } 2128 } 2129 ierr = PetscFree(ourlens);CHKERRQ(ierr); 2130 ierr = PetscFree(vals);CHKERRQ(ierr); 2131 ierr = PetscFree(mycols);CHKERRQ(ierr); 2132 ierr = PetscFree(rowners);CHKERRQ(ierr); 2133 2134 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2135 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2136 PetscFunctionReturn(0); 2137 } 2138 2139 #undef __FUNC__ 2140 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2141 /* 2142 Not great since it makes two copies of the submatrix, first an SeqAIJ 2143 in local and then by concatenating the local matrices the end result. 2144 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2145 */ 2146 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat) 2147 { 2148 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2149 Mat *local,M, Mreuse; 2150 Scalar *vwork,*aa; 2151 MPI_Comm comm = mat->comm; 2152 Mat_SeqAIJ *aij; 2153 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2154 2155 PetscFunctionBegin; 2156 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2157 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2158 2159 if (call == MAT_REUSE_MATRIX) { 2160 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2161 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2162 local = &Mreuse; 2163 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2164 } else { 2165 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2166 Mreuse = *local; 2167 ierr = PetscFree(local);CHKERRQ(ierr); 2168 } 2169 2170 /* 2171 m - number of local rows 2172 n - number of columns (same on all processors) 2173 rstart - first row in new global matrix generated 2174 */ 2175 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2176 if (call == MAT_INITIAL_MATRIX) { 2177 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2178 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2179 ii = aij->i; 2180 jj = aij->j; 2181 2182 /* 2183 Determine the number of non-zeros in the diagonal and off-diagonal 2184 portions of the matrix in order to do correct preallocation 2185 */ 2186 2187 /* first get start and end of "diagonal" columns */ 2188 if (csize == PETSC_DECIDE) { 2189 nlocal = n/size + ((n % size) > rank); 2190 } else { 2191 nlocal = csize; 2192 } 2193 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2194 rstart = rend - nlocal; 2195 if (rank == size - 1 && rend != n) { 2196 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2197 } 2198 2199 /* next, compute all the lengths */ 2200 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2201 olens = dlens + m; 2202 for ( i=0; i<m; i++ ) { 2203 jend = ii[i+1] - ii[i]; 2204 olen = 0; 2205 dlen = 0; 2206 for ( j=0; j<jend; j++ ) { 2207 if ( *jj < rstart || *jj >= rend) olen++; 2208 else dlen++; 2209 jj++; 2210 } 2211 olens[i] = olen; 2212 dlens[i] = dlen; 2213 } 2214 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2215 ierr = PetscFree(dlens);CHKERRQ(ierr); 2216 } else { 2217 int ml,nl; 2218 2219 M = *newmat; 2220 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2221 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2222 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2223 /* 2224 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2225 rather than the slower MatSetValues(). 2226 */ 2227 M->was_assembled = PETSC_TRUE; 2228 M->assembled = PETSC_FALSE; 2229 } 2230 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 2231 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2232 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2233 ii = aij->i; 2234 jj = aij->j; 2235 aa = aij->a; 2236 for (i=0; i<m; i++) { 2237 row = rstart + i; 2238 nz = ii[i+1] - ii[i]; 2239 cwork = jj; jj += nz; 2240 vwork = aa; aa += nz; 2241 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 2242 } 2243 2244 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2245 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2246 *newmat = M; 2247 2248 /* save submatrix used in processor for next request */ 2249 if (call == MAT_INITIAL_MATRIX) { 2250 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2251 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2252 } 2253 2254 PetscFunctionReturn(0); 2255 } 2256