1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mpiaij.c,v 1.269 1998/12/23 19:31:15 bsmith Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/aij/mpi/mpiaij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/spops.h" 8 9 /* local utility routine that creates a mapping from the global column 10 number to the local number in the off-diagonal part of the local 11 storage of the matrix. This is done in a non scable way since the 12 length of colmap equals the global matrix length. 13 */ 14 #undef __FUNC__ 15 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 16 int CreateColmap_MPIAIJ_Private(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 19 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 20 int n = B->n,i; 21 22 PetscFunctionBegin; 23 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 24 PLogObjectMemory(mat,aij->N*sizeof(int)); 25 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 26 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 27 PetscFunctionReturn(0); 28 } 29 30 extern int DisAssemble_MPIAIJ(Mat); 31 32 #define CHUNKSIZE 15 33 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 34 { \ 35 \ 36 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 37 rmax = aimax[row]; nrow = ailen[row]; \ 38 col1 = col - shift; \ 39 \ 40 low = 0; high = nrow; \ 41 while (high-low > 5) { \ 42 t = (low+high)/2; \ 43 if (rp[t] > col) high = t; \ 44 else low = t; \ 45 } \ 46 for ( _i=0; _i<nrow; _i++ ) { \ 47 if (rp[_i] > col1) break; \ 48 if (rp[_i] == col1) { \ 49 if (addv == ADD_VALUES) ap[_i] += value; \ 50 else ap[_i] = value; \ 51 goto a_noinsert; \ 52 } \ 53 } \ 54 if (nonew == 1) goto a_noinsert; \ 55 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 56 if (nrow >= rmax) { \ 57 /* there is no extra room in row, therefore enlarge */ \ 58 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 59 Scalar *new_a; \ 60 \ 61 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 62 \ 63 /* malloc new storage space */ \ 64 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 65 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 66 new_j = (int *) (new_a + new_nz); \ 67 new_i = new_j + new_nz; \ 68 \ 69 /* copy over old data into new slots */ \ 70 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 71 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 72 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 73 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 74 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 75 len*sizeof(int)); \ 76 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 77 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 78 len*sizeof(Scalar)); \ 79 /* free up old matrix storage */ \ 80 \ 81 PetscFree(a->a); \ 82 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 83 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 84 a->singlemalloc = 1; \ 85 \ 86 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 87 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 88 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 89 a->maxnz += CHUNKSIZE; \ 90 a->reallocs++; \ 91 } \ 92 N = nrow++ - 1; a->nz++; \ 93 /* shift up all the later entries in this row */ \ 94 for ( ii=N; ii>=_i; ii-- ) { \ 95 rp[ii+1] = rp[ii]; \ 96 ap[ii+1] = ap[ii]; \ 97 } \ 98 rp[_i] = col1; \ 99 ap[_i] = value; \ 100 a_noinsert: ; \ 101 ailen[row] = nrow; \ 102 } 103 104 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 105 { \ 106 \ 107 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 108 rmax = bimax[row]; nrow = bilen[row]; \ 109 col1 = col - shift; \ 110 \ 111 low = 0; high = nrow; \ 112 while (high-low > 5) { \ 113 t = (low+high)/2; \ 114 if (rp[t] > col) high = t; \ 115 else low = t; \ 116 } \ 117 for ( _i=0; _i<nrow; _i++ ) { \ 118 if (rp[_i] > col1) break; \ 119 if (rp[_i] == col1) { \ 120 if (addv == ADD_VALUES) ap[_i] += value; \ 121 else ap[_i] = value; \ 122 goto b_noinsert; \ 123 } \ 124 } \ 125 if (nonew == 1) goto b_noinsert; \ 126 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 127 if (nrow >= rmax) { \ 128 /* there is no extra room in row, therefore enlarge */ \ 129 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 130 Scalar *new_a; \ 131 \ 132 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 133 \ 134 /* malloc new storage space */ \ 135 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 136 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 137 new_j = (int *) (new_a + new_nz); \ 138 new_i = new_j + new_nz; \ 139 \ 140 /* copy over old data into new slots */ \ 141 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 142 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 143 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 144 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 145 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 146 len*sizeof(int)); \ 147 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 148 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 149 len*sizeof(Scalar)); \ 150 /* free up old matrix storage */ \ 151 \ 152 PetscFree(b->a); \ 153 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 154 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 155 b->singlemalloc = 1; \ 156 \ 157 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 158 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 159 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 160 b->maxnz += CHUNKSIZE; \ 161 b->reallocs++; \ 162 } \ 163 N = nrow++ - 1; b->nz++; \ 164 /* shift up all the later entries in this row */ \ 165 for ( ii=N; ii>=_i; ii-- ) { \ 166 rp[ii+1] = rp[ii]; \ 167 ap[ii+1] = ap[ii]; \ 168 } \ 169 rp[_i] = col1; \ 170 ap[_i] = value; \ 171 b_noinsert: ; \ 172 bilen[row] = nrow; \ 173 } 174 175 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 176 #undef __FUNC__ 177 #define __FUNC__ "MatSetValues_MPIAIJ" 178 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 179 { 180 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 181 Scalar value; 182 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 183 int cstart = aij->cstart, cend = aij->cend,row,col; 184 int roworiented = aij->roworiented; 185 186 /* Some Variables required in the macro */ 187 Mat A = aij->A; 188 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 189 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 190 Scalar *aa = a->a; 191 192 Mat B = aij->B; 193 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 194 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 195 Scalar *ba = b->a; 196 197 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 198 int nonew = a->nonew,shift = a->indexshift; 199 Scalar *ap; 200 201 PetscFunctionBegin; 202 for ( i=0; i<m; i++ ) { 203 #if defined(USE_PETSC_BOPT_g) 204 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 205 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 206 #endif 207 if (im[i] >= rstart && im[i] < rend) { 208 row = im[i] - rstart; 209 for ( j=0; j<n; j++ ) { 210 if (in[j] >= cstart && in[j] < cend){ 211 col = in[j] - cstart; 212 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 213 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 214 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 215 } 216 #if defined(USE_PETSC_BOPT_g) 217 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 218 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 219 #endif 220 else { 221 if (mat->was_assembled) { 222 if (!aij->colmap) { 223 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 224 } 225 col = aij->colmap[in[j]] - 1; 226 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 227 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 228 col = in[j]; 229 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 230 B = aij->B; 231 b = (Mat_SeqAIJ *) B->data; 232 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 233 ba = b->a; 234 } 235 } else col = in[j]; 236 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 237 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 238 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 239 } 240 } 241 } 242 else { 243 if (roworiented && !aij->donotstash) { 244 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 245 } 246 else { 247 if (!aij->donotstash) { 248 row = im[i]; 249 for ( j=0; j<n; j++ ) { 250 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 251 } 252 } 253 } 254 } 255 } 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNC__ 260 #define __FUNC__ "MatGetValues_MPIAIJ" 261 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 262 { 263 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 264 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 265 int cstart = aij->cstart, cend = aij->cend,row,col; 266 267 PetscFunctionBegin; 268 for ( i=0; i<m; i++ ) { 269 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 270 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 271 if (idxm[i] >= rstart && idxm[i] < rend) { 272 row = idxm[i] - rstart; 273 for ( j=0; j<n; j++ ) { 274 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 275 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 276 if (idxn[j] >= cstart && idxn[j] < cend){ 277 col = idxn[j] - cstart; 278 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 279 } else { 280 if (!aij->colmap) { 281 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 282 } 283 col = aij->colmap[idxn[j]] - 1; 284 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 285 else { 286 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 287 } 288 } 289 } 290 } else { 291 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 292 } 293 } 294 PetscFunctionReturn(0); 295 } 296 297 #undef __FUNC__ 298 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 299 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 300 { 301 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 302 MPI_Comm comm = mat->comm; 303 int size = aij->size, *owners = aij->rowners; 304 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 305 MPI_Request *send_waits,*recv_waits; 306 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 307 InsertMode addv; 308 Scalar *rvalues,*svalues; 309 310 PetscFunctionBegin; 311 if (aij->donotstash) { 312 aij->svalues = 0; aij->rvalues = 0; 313 aij->nsends = 0; aij->nrecvs = 0; 314 aij->send_waits = 0; aij->recv_waits = 0; 315 aij->rmax = 0; 316 PetscFunctionReturn(0); 317 } 318 319 /* make sure all processors are either in INSERTMODE or ADDMODE */ 320 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 321 if (addv == (ADD_VALUES|INSERT_VALUES)) { 322 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 323 } 324 mat->insertmode = addv; /* in case this processor had no cache */ 325 326 /* first count number of contributors to each processor */ 327 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 328 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 329 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 330 for ( i=0; i<aij->stash.n; i++ ) { 331 idx = aij->stash.idx[i]; 332 for ( j=0; j<size; j++ ) { 333 if (idx >= owners[j] && idx < owners[j+1]) { 334 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 335 } 336 } 337 } 338 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 339 340 /* inform other processors of number of messages and max length*/ 341 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 342 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 343 nreceives = work[rank]; 344 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 345 nmax = work[rank]; 346 PetscFree(work); 347 348 /* post receives: 349 1) each message will consist of ordered pairs 350 (global index,value) we store the global index as a double 351 to simplify the message passing. 352 2) since we don't know how long each individual message is we 353 allocate the largest needed buffer for each receive. Potentially 354 this is a lot of wasted space. 355 356 357 This could be done better. 358 */ 359 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 360 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 361 for ( i=0; i<nreceives; i++ ) { 362 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 363 comm,recv_waits+i);CHKERRQ(ierr); 364 } 365 366 /* do sends: 367 1) starts[i] gives the starting index in svalues for stuff going to 368 the ith processor 369 */ 370 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 371 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 372 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 373 starts[0] = 0; 374 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 375 for ( i=0; i<aij->stash.n; i++ ) { 376 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 377 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 378 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 379 } 380 PetscFree(owner); 381 starts[0] = 0; 382 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 383 count = 0; 384 for ( i=0; i<size; i++ ) { 385 if (procs[i]) { 386 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 387 } 388 } 389 PetscFree(starts); 390 PetscFree(nprocs); 391 392 /* Free cache space */ 393 PLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 394 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 395 396 aij->svalues = svalues; aij->rvalues = rvalues; 397 aij->nsends = nsends; aij->nrecvs = nreceives; 398 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 399 aij->rmax = nmax; 400 401 PetscFunctionReturn(0); 402 } 403 extern int MatSetUpMultiply_MPIAIJ(Mat); 404 405 #undef __FUNC__ 406 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 407 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 408 { 409 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 410 MPI_Status *send_status,recv_status; 411 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 412 int row,col,other_disassembled; 413 Scalar *values,val; 414 InsertMode addv = mat->insertmode; 415 416 PetscFunctionBegin; 417 418 /* wait on receives */ 419 while (count) { 420 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 421 /* unpack receives into our local space */ 422 values = aij->rvalues + 3*imdex*aij->rmax; 423 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 424 n = n/3; 425 for ( i=0; i<n; i++ ) { 426 row = (int) PetscReal(values[3*i]) - aij->rstart; 427 col = (int) PetscReal(values[3*i+1]); 428 val = values[3*i+2]; 429 if (col >= aij->cstart && col < aij->cend) { 430 col -= aij->cstart; 431 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 432 } else { 433 if (mat->was_assembled || mat->assembled) { 434 if (!aij->colmap) { 435 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 436 } 437 col = aij->colmap[col] - 1; 438 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 439 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 440 col = (int) PetscReal(values[3*i+1]); 441 } 442 } 443 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 444 } 445 } 446 count--; 447 } 448 if (aij->recv_waits) PetscFree(aij->recv_waits); 449 if (aij->rvalues) PetscFree(aij->rvalues); 450 451 /* wait on sends */ 452 if (aij->nsends) { 453 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 454 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 455 PetscFree(send_status); 456 } 457 if (aij->send_waits) PetscFree(aij->send_waits); 458 if (aij->svalues) PetscFree(aij->svalues); 459 460 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 461 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 462 463 /* determine if any processor has disassembled, if so we must 464 also disassemble ourselfs, in order that we may reassemble. */ 465 /* 466 if nonzero structure of submatrix B cannot change then we know that 467 no processor disassembled thus we can skip this stuff 468 */ 469 if (!((Mat_SeqAIJ*) aij->B->data)->nonew) { 470 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 471 if (mat->was_assembled && !other_disassembled) { 472 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 473 } 474 } 475 476 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 477 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 478 } 479 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 480 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 481 482 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 483 PetscFunctionReturn(0); 484 } 485 486 #undef __FUNC__ 487 #define __FUNC__ "MatZeroEntries_MPIAIJ" 488 int MatZeroEntries_MPIAIJ(Mat A) 489 { 490 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 491 int ierr; 492 493 PetscFunctionBegin; 494 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 495 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 496 PetscFunctionReturn(0); 497 } 498 499 #undef __FUNC__ 500 #define __FUNC__ "MatZeroRows_MPIAIJ" 501 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 502 { 503 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 504 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 505 int *procs,*nprocs,j,found,idx,nsends,*work,row; 506 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 507 int *rvalues,tag = A->tag,count,base,slen,n,*source; 508 int *lens,imdex,*lrows,*values,rstart=l->rstart; 509 MPI_Comm comm = A->comm; 510 MPI_Request *send_waits,*recv_waits; 511 MPI_Status recv_status,*send_status; 512 IS istmp; 513 PetscTruth localdiag; 514 515 PetscFunctionBegin; 516 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 517 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 518 519 /* first count number of contributors to each processor */ 520 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 521 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 522 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 523 for ( i=0; i<N; i++ ) { 524 idx = rows[i]; 525 found = 0; 526 for ( j=0; j<size; j++ ) { 527 if (idx >= owners[j] && idx < owners[j+1]) { 528 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 529 } 530 } 531 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 532 } 533 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 534 535 /* inform other processors of number of messages and max length*/ 536 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 537 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 538 nrecvs = work[rank]; 539 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 540 nmax = work[rank]; 541 PetscFree(work); 542 543 /* post receives: */ 544 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 545 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 546 for ( i=0; i<nrecvs; i++ ) { 547 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 548 } 549 550 /* do sends: 551 1) starts[i] gives the starting index in svalues for stuff going to 552 the ith processor 553 */ 554 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 555 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 556 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 557 starts[0] = 0; 558 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 559 for ( i=0; i<N; i++ ) { 560 svalues[starts[owner[i]]++] = rows[i]; 561 } 562 ISRestoreIndices(is,&rows); 563 564 starts[0] = 0; 565 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 566 count = 0; 567 for ( i=0; i<size; i++ ) { 568 if (procs[i]) { 569 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 570 } 571 } 572 PetscFree(starts); 573 574 base = owners[rank]; 575 576 /* wait on receives */ 577 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 578 source = lens + nrecvs; 579 count = nrecvs; slen = 0; 580 while (count) { 581 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 582 /* unpack receives into our local space */ 583 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 584 source[imdex] = recv_status.MPI_SOURCE; 585 lens[imdex] = n; 586 slen += n; 587 count--; 588 } 589 PetscFree(recv_waits); 590 591 /* move the data into the send scatter */ 592 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 593 count = 0; 594 for ( i=0; i<nrecvs; i++ ) { 595 values = rvalues + i*nmax; 596 for ( j=0; j<lens[i]; j++ ) { 597 lrows[count++] = values[j] - base; 598 } 599 } 600 PetscFree(rvalues); PetscFree(lens); 601 PetscFree(owner); PetscFree(nprocs); 602 603 /* actually zap the local rows */ 604 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 605 PLogObjectParent(A,istmp); 606 607 /* 608 Zero the required rows. If the "diagonal block" of the matrix 609 is square and the user wishes to set the diagonal we use seperate 610 code so that MatSetValues() is not called for each diagonal allocating 611 new memory, thus calling lots of mallocs and slowing things down. 612 613 Contributed by: Mathew Knepley 614 */ 615 localdiag = PETSC_FALSE; 616 if (diag && (l->A->M == l->A->N)) { 617 localdiag = PETSC_TRUE; 618 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 619 } else { 620 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 621 } 622 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 623 ierr = ISDestroy(istmp); CHKERRQ(ierr); 624 625 if (diag && (localdiag == PETSC_FALSE)) { 626 for ( i = 0; i < slen; i++ ) { 627 row = lrows[i] + rstart; 628 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 629 } 630 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 631 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 632 } 633 634 if (diag) { 635 for ( i = 0; i < slen; i++ ) { 636 row = lrows[i] + rstart; 637 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 638 } 639 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 640 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 641 } 642 PetscFree(lrows); 643 644 /* wait on sends */ 645 if (nsends) { 646 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 647 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 648 PetscFree(send_status); 649 } 650 PetscFree(send_waits); PetscFree(svalues); 651 652 PetscFunctionReturn(0); 653 } 654 655 #undef __FUNC__ 656 #define __FUNC__ "MatMult_MPIAIJ" 657 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 658 { 659 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 660 int ierr,nt; 661 662 PetscFunctionBegin; 663 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 664 if (nt != a->n) { 665 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 666 } 667 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 668 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 669 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 670 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 671 PetscFunctionReturn(0); 672 } 673 674 #undef __FUNC__ 675 #define __FUNC__ "MatMultAdd_MPIAIJ" 676 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 677 { 678 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 679 int ierr; 680 681 PetscFunctionBegin; 682 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 683 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 684 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 685 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 686 PetscFunctionReturn(0); 687 } 688 689 #undef __FUNC__ 690 #define __FUNC__ "MatMultTrans_MPIAIJ" 691 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 692 { 693 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 694 int ierr; 695 696 PetscFunctionBegin; 697 /* do nondiagonal part */ 698 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 699 /* send it on its way */ 700 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 701 /* do local part */ 702 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 703 /* receive remote parts: note this assumes the values are not actually */ 704 /* inserted in yy until the next line, which is true for my implementation*/ 705 /* but is not perhaps always true. */ 706 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 707 PetscFunctionReturn(0); 708 } 709 710 #undef __FUNC__ 711 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 712 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 713 { 714 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 715 int ierr; 716 717 PetscFunctionBegin; 718 /* do nondiagonal part */ 719 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 720 /* send it on its way */ 721 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 722 /* do local part */ 723 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 724 /* receive remote parts: note this assumes the values are not actually */ 725 /* inserted in yy until the next line, which is true for my implementation*/ 726 /* but is not perhaps always true. */ 727 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 /* 732 This only works correctly for square matrices where the subblock A->A is the 733 diagonal block 734 */ 735 #undef __FUNC__ 736 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 737 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 738 { 739 int ierr; 740 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 741 742 PetscFunctionBegin; 743 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 744 if (a->rstart != a->cstart || a->rend != a->cend) { 745 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 746 } 747 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNC__ 752 #define __FUNC__ "MatScale_MPIAIJ" 753 int MatScale_MPIAIJ(Scalar *aa,Mat A) 754 { 755 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 756 int ierr; 757 758 PetscFunctionBegin; 759 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 760 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 761 PetscFunctionReturn(0); 762 } 763 764 #undef __FUNC__ 765 #define __FUNC__ "MatDestroy_MPIAIJ" 766 int MatDestroy_MPIAIJ(Mat mat) 767 { 768 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 769 int ierr; 770 771 PetscFunctionBegin; 772 if (--mat->refct > 0) PetscFunctionReturn(0); 773 774 if (mat->mapping) { 775 ierr = ISLocalToGlobalMappingDestroy(mat->mapping); CHKERRQ(ierr); 776 } 777 if (mat->bmapping) { 778 ierr = ISLocalToGlobalMappingDestroy(mat->bmapping); CHKERRQ(ierr); 779 } 780 if (mat->rmap) { 781 ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 782 } 783 if (mat->cmap) { 784 ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 785 } 786 #if defined(USE_PETSC_LOG) 787 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 788 #endif 789 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 790 PetscFree(aij->rowners); 791 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 792 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 793 if (aij->colmap) PetscFree(aij->colmap); 794 if (aij->garray) PetscFree(aij->garray); 795 if (aij->lvec) VecDestroy(aij->lvec); 796 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 797 if (aij->rowvalues) PetscFree(aij->rowvalues); 798 PetscFree(aij); 799 PLogObjectDestroy(mat); 800 PetscHeaderDestroy(mat); 801 PetscFunctionReturn(0); 802 } 803 804 #undef __FUNC__ 805 #define __FUNC__ "MatView_MPIAIJ_Binary" 806 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 807 { 808 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 809 int ierr; 810 811 PetscFunctionBegin; 812 if (aij->size == 1) { 813 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 814 } 815 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNC__ 820 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 821 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 822 { 823 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 824 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 825 int ierr, format,shift = C->indexshift,rank = aij->rank ; 826 int size = aij->size; 827 FILE *fd; 828 ViewerType vtype; 829 830 PetscFunctionBegin; 831 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 832 if (PetscTypeCompare(vtype,ASCII_VIEWER)) { 833 ierr = ViewerGetFormat(viewer,&format); 834 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 835 MatInfo info; 836 int flg; 837 MPI_Comm_rank(mat->comm,&rank); 838 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 839 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 840 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 841 PetscSequentialPhaseBegin(mat->comm,1); 842 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 843 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 844 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 845 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 846 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 847 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 848 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 849 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 850 fflush(fd); 851 PetscSequentialPhaseEnd(mat->comm,1); 852 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 853 PetscFunctionReturn(0); 854 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 855 PetscFunctionReturn(0); 856 } 857 } else if (PetscTypeCompare(vtype,DRAW_VIEWER)) { 858 Draw draw; 859 PetscTruth isnull; 860 ierr = ViewerDrawGetDraw(viewer,0,&draw); CHKERRQ(ierr); 861 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 862 } 863 864 if (size == 1) { 865 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 866 } else { 867 /* assemble the entire matrix onto first processor. */ 868 Mat A; 869 Mat_SeqAIJ *Aloc; 870 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 871 Scalar *a; 872 873 if (!rank) { 874 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 875 } else { 876 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 877 } 878 PLogObjectParent(mat,A); 879 880 /* copy over the A part */ 881 Aloc = (Mat_SeqAIJ*) aij->A->data; 882 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 883 row = aij->rstart; 884 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 885 for ( i=0; i<m; i++ ) { 886 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 887 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 888 } 889 aj = Aloc->j; 890 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 891 892 /* copy over the B part */ 893 Aloc = (Mat_SeqAIJ*) aij->B->data; 894 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 895 row = aij->rstart; 896 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 897 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 898 for ( i=0; i<m; i++ ) { 899 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 900 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 901 } 902 PetscFree(ct); 903 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 904 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 905 /* 906 Everyone has to call to draw the matrix since the graphics waits are 907 synchronized across all processors that share the Draw object 908 */ 909 if (!rank || PetscTypeCompare(vtype,DRAW_VIEWER)) { 910 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 911 } 912 ierr = MatDestroy(A); CHKERRQ(ierr); 913 } 914 PetscFunctionReturn(0); 915 } 916 917 #undef __FUNC__ 918 #define __FUNC__ "MatView_MPIAIJ" 919 int MatView_MPIAIJ(Mat mat,Viewer viewer) 920 { 921 int ierr; 922 ViewerType vtype; 923 924 PetscFunctionBegin; 925 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 926 if (PetscTypeCompare(vtype,ASCII_VIEWER) || PetscTypeCompare(vtype,DRAW_VIEWER) || 927 PetscTypeCompare(vtype,MATLAB_VIEWER)) { 928 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 929 } else if (PetscTypeCompare(vtype,BINARY_VIEWER)) { 930 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 931 } else { 932 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 933 } 934 PetscFunctionReturn(0); 935 } 936 937 /* 938 This has to provide several versions. 939 940 2) a) use only local smoothing updating outer values only once. 941 b) local smoothing updating outer values each inner iteration 942 3) color updating out values betwen colors. 943 */ 944 #undef __FUNC__ 945 #define __FUNC__ "MatRelax_MPIAIJ" 946 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 947 double fshift,int its,Vec xx) 948 { 949 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 950 Mat AA = mat->A, BB = mat->B; 951 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 952 Scalar *b,*x,*xs,*ls,d,*v,sum; 953 int ierr,*idx, *diag; 954 int n = mat->n, m = mat->m, i,shift = A->indexshift; 955 956 PetscFunctionBegin; 957 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 958 ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 959 ierr = VecGetArray(mat->lvec,&ls);CHKERRQ(ierr); 960 xs = x + shift; /* shift by one for index start of 1 */ 961 ls = ls + shift; 962 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 963 diag = A->diag; 964 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 965 if (flag & SOR_ZERO_INITIAL_GUESS) { 966 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 967 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 968 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 969 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } 972 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 973 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 974 while (its--) { 975 /* go down through the rows */ 976 for ( i=0; i<m; i++ ) { 977 n = A->i[i+1] - A->i[i]; 978 PLogFlops(4*n+3); 979 idx = A->j + A->i[i] + shift; 980 v = A->a + A->i[i] + shift; 981 sum = b[i]; 982 SPARSEDENSEMDOT(sum,xs,v,idx,n); 983 d = fshift + A->a[diag[i]+shift]; 984 n = B->i[i+1] - B->i[i]; 985 idx = B->j + B->i[i] + shift; 986 v = B->a + B->i[i] + shift; 987 SPARSEDENSEMDOT(sum,ls,v,idx,n); 988 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 989 } 990 /* come up through the rows */ 991 for ( i=m-1; i>-1; i-- ) { 992 n = A->i[i+1] - A->i[i]; 993 PLogFlops(4*n+3) 994 idx = A->j + A->i[i] + shift; 995 v = A->a + A->i[i] + shift; 996 sum = b[i]; 997 SPARSEDENSEMDOT(sum,xs,v,idx,n); 998 d = fshift + A->a[diag[i]+shift]; 999 n = B->i[i+1] - B->i[i]; 1000 idx = B->j + B->i[i] + shift; 1001 v = B->a + B->i[i] + shift; 1002 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1003 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1004 } 1005 } 1006 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1007 if (flag & SOR_ZERO_INITIAL_GUESS) { 1008 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1009 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1010 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1011 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1015 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1016 while (its--) { 1017 for ( i=0; i<m; i++ ) { 1018 n = A->i[i+1] - A->i[i]; 1019 PLogFlops(4*n+3); 1020 idx = A->j + A->i[i] + shift; 1021 v = A->a + A->i[i] + shift; 1022 sum = b[i]; 1023 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1024 d = fshift + A->a[diag[i]+shift]; 1025 n = B->i[i+1] - B->i[i]; 1026 idx = B->j + B->i[i] + shift; 1027 v = B->a + B->i[i] + shift; 1028 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1029 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1030 } 1031 } 1032 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1033 if (flag & SOR_ZERO_INITIAL_GUESS) { 1034 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1035 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1036 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1037 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1038 PetscFunctionReturn(0); 1039 } 1040 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 1042 while (its--) { 1043 for ( i=m-1; i>-1; i-- ) { 1044 n = A->i[i+1] - A->i[i]; 1045 PLogFlops(4*n+3); 1046 idx = A->j + A->i[i] + shift; 1047 v = A->a + A->i[i] + shift; 1048 sum = b[i]; 1049 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1050 d = fshift + A->a[diag[i]+shift]; 1051 n = B->i[i+1] - B->i[i]; 1052 idx = B->j + B->i[i] + shift; 1053 v = B->a + B->i[i] + shift; 1054 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1055 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1056 } 1057 } 1058 } else { 1059 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1060 } 1061 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1062 ierr = VecRestoreArray(bb,&b); CHKERRQ(ierr); 1063 ierr = VecRestoreArray(mat->lvec,&ls);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNC__ 1068 #define __FUNC__ "MatGetInfo_MPIAIJ" 1069 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1070 { 1071 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1072 Mat A = mat->A, B = mat->B; 1073 int ierr; 1074 double isend[5], irecv[5]; 1075 1076 PetscFunctionBegin; 1077 info->block_size = 1.0; 1078 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1079 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1080 isend[3] = info->memory; isend[4] = info->mallocs; 1081 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1082 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1083 isend[3] += info->memory; isend[4] += info->mallocs; 1084 if (flag == MAT_LOCAL) { 1085 info->nz_used = isend[0]; 1086 info->nz_allocated = isend[1]; 1087 info->nz_unneeded = isend[2]; 1088 info->memory = isend[3]; 1089 info->mallocs = isend[4]; 1090 } else if (flag == MAT_GLOBAL_MAX) { 1091 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1092 info->nz_used = irecv[0]; 1093 info->nz_allocated = irecv[1]; 1094 info->nz_unneeded = irecv[2]; 1095 info->memory = irecv[3]; 1096 info->mallocs = irecv[4]; 1097 } else if (flag == MAT_GLOBAL_SUM) { 1098 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1099 info->nz_used = irecv[0]; 1100 info->nz_allocated = irecv[1]; 1101 info->nz_unneeded = irecv[2]; 1102 info->memory = irecv[3]; 1103 info->mallocs = irecv[4]; 1104 } 1105 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1106 info->fill_ratio_needed = 0; 1107 info->factor_mallocs = 0; 1108 info->rows_global = (double)mat->M; 1109 info->columns_global = (double)mat->N; 1110 info->rows_local = (double)mat->m; 1111 info->columns_local = (double)mat->N; 1112 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNC__ 1117 #define __FUNC__ "MatSetOption_MPIAIJ" 1118 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1119 { 1120 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1121 1122 PetscFunctionBegin; 1123 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1124 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1125 op == MAT_COLUMNS_UNSORTED || 1126 op == MAT_COLUMNS_SORTED || 1127 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1128 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1129 MatSetOption(a->A,op); 1130 MatSetOption(a->B,op); 1131 } else if (op == MAT_ROW_ORIENTED) { 1132 a->roworiented = 1; 1133 MatSetOption(a->A,op); 1134 MatSetOption(a->B,op); 1135 } else if (op == MAT_ROWS_SORTED || 1136 op == MAT_ROWS_UNSORTED || 1137 op == MAT_SYMMETRIC || 1138 op == MAT_STRUCTURALLY_SYMMETRIC || 1139 op == MAT_YES_NEW_DIAGONALS) 1140 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1141 else if (op == MAT_COLUMN_ORIENTED) { 1142 a->roworiented = 0; 1143 MatSetOption(a->A,op); 1144 MatSetOption(a->B,op); 1145 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1146 a->donotstash = 1; 1147 } else if (op == MAT_NO_NEW_DIAGONALS){ 1148 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1149 } else { 1150 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1151 } 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNC__ 1156 #define __FUNC__ "MatGetSize_MPIAIJ" 1157 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1158 { 1159 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1160 1161 PetscFunctionBegin; 1162 if (m) *m = mat->M; 1163 if (n) *n = mat->N; 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNC__ 1168 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1169 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1170 { 1171 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1172 1173 PetscFunctionBegin; 1174 if (m) *m = mat->m; 1175 if (n) *n = mat->n; 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNC__ 1180 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1181 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1182 { 1183 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1184 1185 PetscFunctionBegin; 1186 *m = mat->rstart; *n = mat->rend; 1187 PetscFunctionReturn(0); 1188 } 1189 1190 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1191 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1192 1193 #undef __FUNC__ 1194 #define __FUNC__ "MatGetRow_MPIAIJ" 1195 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1196 { 1197 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1198 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1199 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1200 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1201 int *cmap, *idx_p; 1202 1203 PetscFunctionBegin; 1204 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1205 mat->getrowactive = PETSC_TRUE; 1206 1207 if (!mat->rowvalues && (idx || v)) { 1208 /* 1209 allocate enough space to hold information from the longest row. 1210 */ 1211 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1212 int max = 1,m = mat->m,tmp; 1213 for ( i=0; i<m; i++ ) { 1214 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1215 if (max < tmp) { max = tmp; } 1216 } 1217 mat->rowvalues = (Scalar *) PetscMalloc(max*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1218 mat->rowindices = (int *) (mat->rowvalues + max); 1219 } 1220 1221 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1222 lrow = row - rstart; 1223 1224 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1225 if (!v) {pvA = 0; pvB = 0;} 1226 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1227 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1228 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1229 nztot = nzA + nzB; 1230 1231 cmap = mat->garray; 1232 if (v || idx) { 1233 if (nztot) { 1234 /* Sort by increasing column numbers, assuming A and B already sorted */ 1235 int imark = -1; 1236 if (v) { 1237 *v = v_p = mat->rowvalues; 1238 for ( i=0; i<nzB; i++ ) { 1239 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1240 else break; 1241 } 1242 imark = i; 1243 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1244 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1245 } 1246 if (idx) { 1247 *idx = idx_p = mat->rowindices; 1248 if (imark > -1) { 1249 for ( i=0; i<imark; i++ ) { 1250 idx_p[i] = cmap[cworkB[i]]; 1251 } 1252 } else { 1253 for ( i=0; i<nzB; i++ ) { 1254 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1255 else break; 1256 } 1257 imark = i; 1258 } 1259 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1260 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1261 } 1262 } else { 1263 if (idx) *idx = 0; 1264 if (v) *v = 0; 1265 } 1266 } 1267 *nz = nztot; 1268 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1269 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1270 PetscFunctionReturn(0); 1271 } 1272 1273 #undef __FUNC__ 1274 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1275 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1276 { 1277 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1278 1279 PetscFunctionBegin; 1280 if (aij->getrowactive == PETSC_FALSE) { 1281 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1282 } 1283 aij->getrowactive = PETSC_FALSE; 1284 PetscFunctionReturn(0); 1285 } 1286 1287 #undef __FUNC__ 1288 #define __FUNC__ "MatNorm_MPIAIJ" 1289 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1290 { 1291 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1292 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1293 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1294 double sum = 0.0; 1295 Scalar *v; 1296 1297 PetscFunctionBegin; 1298 if (aij->size == 1) { 1299 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1300 } else { 1301 if (type == NORM_FROBENIUS) { 1302 v = amat->a; 1303 for (i=0; i<amat->nz; i++ ) { 1304 #if defined(USE_PETSC_COMPLEX) 1305 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1306 #else 1307 sum += (*v)*(*v); v++; 1308 #endif 1309 } 1310 v = bmat->a; 1311 for (i=0; i<bmat->nz; i++ ) { 1312 #if defined(USE_PETSC_COMPLEX) 1313 sum += PetscReal(PetscConj(*v)*(*v)); v++; 1314 #else 1315 sum += (*v)*(*v); v++; 1316 #endif 1317 } 1318 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1319 *norm = sqrt(*norm); 1320 } else if (type == NORM_1) { /* max column norm */ 1321 double *tmp, *tmp2; 1322 int *jj, *garray = aij->garray; 1323 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1324 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1325 PetscMemzero(tmp,aij->N*sizeof(double)); 1326 *norm = 0.0; 1327 v = amat->a; jj = amat->j; 1328 for ( j=0; j<amat->nz; j++ ) { 1329 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1330 } 1331 v = bmat->a; jj = bmat->j; 1332 for ( j=0; j<bmat->nz; j++ ) { 1333 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1334 } 1335 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1336 for ( j=0; j<aij->N; j++ ) { 1337 if (tmp2[j] > *norm) *norm = tmp2[j]; 1338 } 1339 PetscFree(tmp); PetscFree(tmp2); 1340 } else if (type == NORM_INFINITY) { /* max row norm */ 1341 double ntemp = 0.0; 1342 for ( j=0; j<amat->m; j++ ) { 1343 v = amat->a + amat->i[j] + shift; 1344 sum = 0.0; 1345 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1346 sum += PetscAbsScalar(*v); v++; 1347 } 1348 v = bmat->a + bmat->i[j] + shift; 1349 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1350 sum += PetscAbsScalar(*v); v++; 1351 } 1352 if (sum > ntemp) ntemp = sum; 1353 } 1354 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1355 } else { 1356 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1357 } 1358 } 1359 PetscFunctionReturn(0); 1360 } 1361 1362 #undef __FUNC__ 1363 #define __FUNC__ "MatTranspose_MPIAIJ" 1364 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1365 { 1366 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1367 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1368 int ierr,shift = Aloc->indexshift; 1369 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1370 Mat B; 1371 Scalar *array; 1372 1373 PetscFunctionBegin; 1374 if (matout == PETSC_NULL && M != N) { 1375 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1376 } 1377 1378 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1379 1380 /* copy over the A part */ 1381 Aloc = (Mat_SeqAIJ*) a->A->data; 1382 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1383 row = a->rstart; 1384 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1385 for ( i=0; i<m; i++ ) { 1386 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1387 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1388 } 1389 aj = Aloc->j; 1390 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1391 1392 /* copy over the B part */ 1393 Aloc = (Mat_SeqAIJ*) a->B->data; 1394 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1395 row = a->rstart; 1396 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1397 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1398 for ( i=0; i<m; i++ ) { 1399 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1400 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1401 } 1402 PetscFree(ct); 1403 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1404 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1405 if (matout != PETSC_NULL) { 1406 *matout = B; 1407 } else { 1408 PetscOps *Abops; 1409 struct _MatOps *Aops; 1410 1411 /* This isn't really an in-place transpose .... but free data structures from a */ 1412 PetscFree(a->rowners); 1413 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1414 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1415 if (a->colmap) PetscFree(a->colmap); 1416 if (a->garray) PetscFree(a->garray); 1417 if (a->lvec) VecDestroy(a->lvec); 1418 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1419 PetscFree(a); 1420 1421 /* 1422 This is horrible, horrible code. We need to keep the 1423 A pointers for the bops and ops but copy everything 1424 else from C. 1425 */ 1426 Abops = A->bops; 1427 Aops = A->ops; 1428 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1429 A->bops = Abops; 1430 A->ops = Aops; 1431 PetscHeaderDestroy(B); 1432 } 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNC__ 1437 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1438 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1439 { 1440 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1441 Mat a = aij->A, b = aij->B; 1442 int ierr,s1,s2,s3; 1443 1444 PetscFunctionBegin; 1445 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1446 if (rr) { 1447 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1448 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1449 /* Overlap communication with computation. */ 1450 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1451 } 1452 if (ll) { 1453 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1454 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1455 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1456 } 1457 /* scale the diagonal block */ 1458 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1459 1460 if (rr) { 1461 /* Do a scatter end and then right scale the off-diagonal block */ 1462 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1463 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1464 } 1465 1466 PetscFunctionReturn(0); 1467 } 1468 1469 1470 extern int MatPrintHelp_SeqAIJ(Mat); 1471 #undef __FUNC__ 1472 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1473 int MatPrintHelp_MPIAIJ(Mat A) 1474 { 1475 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1476 int ierr; 1477 1478 PetscFunctionBegin; 1479 if (!a->rank) { 1480 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1481 } 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNC__ 1486 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1487 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1488 { 1489 PetscFunctionBegin; 1490 *bs = 1; 1491 PetscFunctionReturn(0); 1492 } 1493 #undef __FUNC__ 1494 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1495 int MatSetUnfactored_MPIAIJ(Mat A) 1496 { 1497 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1498 int ierr; 1499 1500 PetscFunctionBegin; 1501 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1502 PetscFunctionReturn(0); 1503 } 1504 1505 #undef __FUNC__ 1506 #define __FUNC__ "MatEqual_MPIAIJ" 1507 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1508 { 1509 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1510 Mat a, b, c, d; 1511 PetscTruth flg; 1512 int ierr; 1513 1514 PetscFunctionBegin; 1515 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1516 a = matA->A; b = matA->B; 1517 c = matB->A; d = matB->B; 1518 1519 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1520 if (flg == PETSC_TRUE) { 1521 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1522 } 1523 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1524 PetscFunctionReturn(0); 1525 } 1526 1527 #undef __FUNC__ 1528 #define __FUNC__ "MatCopy_MPIAIJ" 1529 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 1530 { 1531 int ierr; 1532 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1533 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 1534 1535 PetscFunctionBegin; 1536 if (str != SAME_NONZERO_PATTERN || B->type != MATMPIAIJ) { 1537 /* because of the column compression in the off-processor part of the matrix a->B, 1538 the number of columns in a->B and b->B may be different, hence we cannot call 1539 the MatCopy() directly on the two parts. If need be, we can provide a more 1540 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 1541 then copying the submatrices */ 1542 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1543 } else { 1544 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 1545 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 1546 } 1547 PetscFunctionReturn(0); 1548 } 1549 1550 extern int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *); 1551 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1552 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1553 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1554 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *); 1555 1556 /* -------------------------------------------------------------------*/ 1557 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 1558 MatGetRow_MPIAIJ, 1559 MatRestoreRow_MPIAIJ, 1560 MatMult_MPIAIJ, 1561 MatMultAdd_MPIAIJ, 1562 MatMultTrans_MPIAIJ, 1563 MatMultTransAdd_MPIAIJ, 1564 0, 1565 0, 1566 0, 1567 0, 1568 0, 1569 0, 1570 MatRelax_MPIAIJ, 1571 MatTranspose_MPIAIJ, 1572 MatGetInfo_MPIAIJ, 1573 MatEqual_MPIAIJ, 1574 MatGetDiagonal_MPIAIJ, 1575 MatDiagonalScale_MPIAIJ, 1576 MatNorm_MPIAIJ, 1577 MatAssemblyBegin_MPIAIJ, 1578 MatAssemblyEnd_MPIAIJ, 1579 0, 1580 MatSetOption_MPIAIJ, 1581 MatZeroEntries_MPIAIJ, 1582 MatZeroRows_MPIAIJ, 1583 0, 1584 0, 1585 0, 1586 0, 1587 MatGetSize_MPIAIJ, 1588 MatGetLocalSize_MPIAIJ, 1589 MatGetOwnershipRange_MPIAIJ, 1590 0, 1591 0, 1592 0, 1593 0, 1594 MatDuplicate_MPIAIJ, 1595 0, 1596 0, 1597 0, 1598 0, 1599 0, 1600 MatGetSubMatrices_MPIAIJ, 1601 MatIncreaseOverlap_MPIAIJ, 1602 MatGetValues_MPIAIJ, 1603 MatCopy_MPIAIJ, 1604 MatPrintHelp_MPIAIJ, 1605 MatScale_MPIAIJ, 1606 0, 1607 0, 1608 0, 1609 MatGetBlockSize_MPIAIJ, 1610 0, 1611 0, 1612 0, 1613 0, 1614 MatFDColoringCreate_MPIAIJ, 1615 0, 1616 MatSetUnfactored_MPIAIJ, 1617 0, 1618 0, 1619 MatGetSubMatrix_MPIAIJ, 1620 0, 1621 0, 1622 MatGetMaps_Petsc}; 1623 1624 /* ----------------------------------------------------------------------------------------*/ 1625 1626 EXTERN_C_BEGIN 1627 #undef __FUNC__ 1628 #define __FUNC__ "MatStoreValues_MPIAIJ" 1629 int MatStoreValues_MPIAIJ(Mat mat) 1630 { 1631 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1632 int ierr; 1633 1634 PetscFunctionBegin; 1635 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 1636 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 1637 PetscFunctionReturn(0); 1638 } 1639 EXTERN_C_END 1640 1641 EXTERN_C_BEGIN 1642 #undef __FUNC__ 1643 #define __FUNC__ "MatRetrieveValues_MPIAIJ" 1644 int MatRetrieveValues_MPIAIJ(Mat mat) 1645 { 1646 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1647 int ierr; 1648 1649 PetscFunctionBegin; 1650 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 1651 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 1652 PetscFunctionReturn(0); 1653 } 1654 EXTERN_C_END 1655 1656 #include "pc.h" 1657 EXTERN_C_BEGIN 1658 extern int PCSetUp_BJacobi_AIJ(PC); 1659 EXTERN_C_END 1660 1661 #undef __FUNC__ 1662 #define __FUNC__ "MatCreateMPIAIJ" 1663 /*@C 1664 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1665 (the default parallel PETSc format). For good matrix assembly performance 1666 the user should preallocate the matrix storage by setting the parameters 1667 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1668 performance can be increased by more than a factor of 50. 1669 1670 Collective on MPI_Comm 1671 1672 Input Parameters: 1673 + comm - MPI communicator 1674 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1675 This value should be the same as the local size used in creating the 1676 y vector for the matrix-vector product y = Ax. 1677 . n - This value should be the same as the local size used in creating the 1678 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1679 calculated if N is given) For square matrices n is almost always m. 1680 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 1681 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 1682 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 1683 (same value is used for all local rows) 1684 . d_nnz - array containing the number of nonzeros in the various rows of the 1685 DIAGONAL portion of the local submatrix (possibly different for each row) 1686 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 1687 The size of this array is equal to the number of local rows, i.e 'm'. 1688 You must leave room for the diagonal entry even if it is zero. 1689 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 1690 submatrix (same value is used for all local rows). 1691 - o_nnz - array containing the number of nonzeros in the various rows of the 1692 OFF-DIAGONAL portion of the local submatrix (possibly different for 1693 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 1694 structure. The size of this array is equal to the number 1695 of local rows, i.e 'm'. 1696 1697 Output Parameter: 1698 . A - the matrix 1699 1700 Notes: 1701 m,n,M,N parameters specify the size of the matrix, and its partitioning across 1702 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 1703 storage requirements for this matrix. 1704 1705 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 1706 processor than it must be used on all processors that share the object for 1707 that argument. 1708 1709 The AIJ format (also called the Yale sparse matrix format or 1710 compressed row storage), is fully compatible with standard Fortran 77 1711 storage. That is, the stored row and column indices can begin at 1712 either one (as in Fortran) or zero. See the users manual for details. 1713 1714 The user MUST specify either the local or global matrix dimensions 1715 (possibly both). 1716 1717 The parallel matrix is partitioned such that the first m0 rows belong to 1718 process 0, the next m1 rows belong to process 1, the next m2 rows belong 1719 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 1720 1721 The DIAGONAL portion of the local submatrix of a processor can be defined 1722 as the submatrix which is obtained by extraction the part corresponding 1723 to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 1724 first row that belongs to the processor, and r2 is the last row belonging 1725 to the this processor. This is a square mxm matrix. The remaining portion 1726 of the local submatrix (mxN) constitute the OFF-DIAGONAL portion. 1727 1728 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 1729 1730 By default, this format uses inodes (identical nodes) when possible. 1731 We search for consecutive rows with the same nonzero structure, thereby 1732 reusing matrix information to achieve increased efficiency. 1733 1734 Options Database Keys: 1735 + -mat_aij_no_inode - Do not use inodes 1736 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1737 - -mat_aij_oneindex - Internally use indexing starting at 1 1738 rather than 0. Note that when calling MatSetValues(), 1739 the user still MUST index entries starting at 0! 1740 . -mat_mpi - use the parallel matrix data structures even on one processor 1741 (defaults to using SeqBAIJ format on one processor) 1742 . -mat_mpi - use the parallel matrix data structures even on one processor 1743 (defaults to using SeqAIJ format on one processor) 1744 1745 1746 Example usage: 1747 1748 Consider the following 8x8 matrix with 34 non-zero values, that is 1749 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 1750 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 1751 as follows: 1752 1753 .vb 1754 1 2 0 | 0 3 0 | 0 4 1755 Proc0 0 5 6 | 7 0 0 | 8 0 1756 9 0 10 | 11 0 0 | 12 0 1757 ------------------------------------- 1758 13 0 14 | 15 16 17 | 0 0 1759 Proc1 0 18 0 | 19 20 21 | 0 0 1760 0 0 0 | 22 23 0 | 24 0 1761 ------------------------------------- 1762 Proc2 25 26 27 | 0 0 28 | 29 0 1763 30 0 0 | 31 32 33 | 0 34 1764 .ve 1765 1766 This can be represented as a collection of submatrices as: 1767 1768 .vb 1769 A B C 1770 D E F 1771 G H I 1772 .ve 1773 1774 Where the submatrices A,B,C are owned by proc0, D,E,F are 1775 owned by proc1, G,H,I are owned by proc2. 1776 1777 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1778 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 1779 The 'M','N' parameters are 8,8, and have the same values on all procs. 1780 1781 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 1782 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 1783 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 1784 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 1785 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 1786 matrix, ans [DF] as another SeqAIJ matrix. 1787 1788 When d_nz, o_nz parameters are specified, d_nz storage elements are 1789 allocated for every row of the local diagonal submatrix, and o_nz 1790 storage locations are allocated for every row of the OFF-DIAGONAL submat. 1791 One way to choose d_nz and o_nz is to use the max nonzerors per local 1792 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 1793 In this case, the values of d_nz,o_nz are: 1794 .vb 1795 proc0 : dnz = 2, o_nz = 2 1796 proc1 : dnz = 3, o_nz = 2 1797 proc2 : dnz = 1, o_nz = 4 1798 .ve 1799 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 1800 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 1801 for proc3. i.e we are using 12+15+10=37 storage locations to store 1802 34 values. 1803 1804 When d_nnz, o_nnz parameters are specified, the storage is specified 1805 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 1806 In the above case the values for d_nnz,o_nnz are: 1807 .vb 1808 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 1809 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 1810 proc2: d_nnz = [1,1] and o_nnz = [4,4] 1811 .ve 1812 Here the space allocated is sum of all the above values i.e 34, and 1813 hence pre-allocation is perfect. 1814 1815 .keywords: matrix, aij, compressed row, sparse, parallel 1816 1817 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1818 @*/ 1819 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1820 { 1821 Mat B; 1822 Mat_MPIAIJ *b; 1823 int ierr, i,sum[2],work[2],size,flag1 = 0, flag2 = 0; 1824 1825 PetscFunctionBegin; 1826 MPI_Comm_size(comm,&size); 1827 ierr = OptionsHasName(PETSC_NULL,"-mat_mpiaij",&flag1); CHKERRQ(ierr); 1828 ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2); CHKERRQ(ierr); 1829 if (!flag1 && !flag2 && size == 1) { 1830 if (M == PETSC_DECIDE) M = m; 1831 if (N == PETSC_DECIDE) N = n; 1832 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1833 PetscFunctionReturn(0); 1834 } 1835 1836 *A = 0; 1837 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",comm,MatDestroy,MatView); 1838 PLogObjectCreate(B); 1839 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1840 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1841 PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)); 1842 B->ops->destroy = MatDestroy_MPIAIJ; 1843 B->ops->view = MatView_MPIAIJ; 1844 B->factor = 0; 1845 B->assembled = PETSC_FALSE; 1846 B->mapping = 0; 1847 1848 B->insertmode = NOT_SET_VALUES; 1849 b->size = size; 1850 MPI_Comm_rank(comm,&b->rank); 1851 1852 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1853 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1854 } 1855 1856 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1857 work[0] = m; work[1] = n; 1858 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1859 if (M == PETSC_DECIDE) M = sum[0]; 1860 if (N == PETSC_DECIDE) N = sum[1]; 1861 } 1862 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1863 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1864 b->m = m; B->m = m; 1865 b->n = n; B->n = n; 1866 b->N = N; B->N = N; 1867 b->M = M; B->M = M; 1868 1869 /* the information in the maps duplicates the information computed below, eventually 1870 we should remove the duplicate information that is not contained in the maps */ 1871 ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr); 1872 ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr); 1873 1874 /* build local table of row and column ownerships */ 1875 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1876 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1877 b->cowners = b->rowners + b->size + 2; 1878 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1879 b->rowners[0] = 0; 1880 for ( i=2; i<=b->size; i++ ) { 1881 b->rowners[i] += b->rowners[i-1]; 1882 } 1883 b->rstart = b->rowners[b->rank]; 1884 b->rend = b->rowners[b->rank+1]; 1885 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1886 b->cowners[0] = 0; 1887 for ( i=2; i<=b->size; i++ ) { 1888 b->cowners[i] += b->cowners[i-1]; 1889 } 1890 b->cstart = b->cowners[b->rank]; 1891 b->cend = b->cowners[b->rank+1]; 1892 1893 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1894 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1895 PLogObjectParent(B,b->A); 1896 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1897 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1898 PLogObjectParent(B,b->B); 1899 1900 /* build cache for off array entries formed */ 1901 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1902 b->donotstash = 0; 1903 b->colmap = 0; 1904 b->garray = 0; 1905 b->roworiented = 1; 1906 1907 /* stuff used for matrix vector multiply */ 1908 b->lvec = 0; 1909 b->Mvctx = 0; 1910 1911 /* stuff for MatGetRow() */ 1912 b->rowindices = 0; 1913 b->rowvalues = 0; 1914 b->getrowactive = PETSC_FALSE; 1915 1916 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C", 1917 "MatStoreValues_MPIAIJ", 1918 (void*)MatStoreValues_MPIAIJ);CHKERRQ(ierr); 1919 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C", 1920 "MatRetrieveValues_MPIAIJ", 1921 (void*)MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 1922 ierr = PetscObjectComposeFunction((PetscObject)B,"PCSetUp_BJacobi_C", 1923 "PCSetUp_BJacobi_AIJ", 1924 (void*)PCSetUp_BJacobi_AIJ);CHKERRQ(ierr); 1925 *A = B; 1926 PetscFunctionReturn(0); 1927 } 1928 1929 #undef __FUNC__ 1930 #define __FUNC__ "MatDuplicate_MPIAIJ" 1931 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 1932 { 1933 Mat mat; 1934 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1935 int ierr, len=0, flg; 1936 1937 PetscFunctionBegin; 1938 *newmat = 0; 1939 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,"Mat",matin->comm,MatDestroy,MatView); 1940 PLogObjectCreate(mat); 1941 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1942 PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps)); 1943 mat->ops->destroy = MatDestroy_MPIAIJ; 1944 mat->ops->view = MatView_MPIAIJ; 1945 mat->factor = matin->factor; 1946 mat->assembled = PETSC_TRUE; 1947 1948 a->m = mat->m = oldmat->m; 1949 a->n = mat->n = oldmat->n; 1950 a->M = mat->M = oldmat->M; 1951 a->N = mat->N = oldmat->N; 1952 1953 a->rstart = oldmat->rstart; 1954 a->rend = oldmat->rend; 1955 a->cstart = oldmat->cstart; 1956 a->cend = oldmat->cend; 1957 a->size = oldmat->size; 1958 a->rank = oldmat->rank; 1959 mat->insertmode = NOT_SET_VALUES; 1960 a->rowvalues = 0; 1961 a->getrowactive = PETSC_FALSE; 1962 1963 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1964 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1965 a->cowners = a->rowners + a->size + 2; 1966 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1967 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1968 if (oldmat->colmap) { 1969 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1970 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1971 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1972 } else a->colmap = 0; 1973 if (oldmat->garray) { 1974 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1975 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1976 PLogObjectMemory(mat,len*sizeof(int)); 1977 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1978 } else a->garray = 0; 1979 1980 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1981 PLogObjectParent(mat,a->lvec); 1982 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1983 PLogObjectParent(mat,a->Mvctx); 1984 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A); CHKERRQ(ierr); 1985 PLogObjectParent(mat,a->A); 1986 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B); CHKERRQ(ierr); 1987 PLogObjectParent(mat,a->B); 1988 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1989 if (flg) { 1990 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1991 } 1992 ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr); 1993 *newmat = mat; 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #include "sys.h" 1998 1999 #undef __FUNC__ 2000 #define __FUNC__ "MatLoad_MPIAIJ" 2001 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 2002 { 2003 Mat A; 2004 Scalar *vals,*svals; 2005 MPI_Comm comm = ((PetscObject)viewer)->comm; 2006 MPI_Status status; 2007 int i, nz, ierr, j,rstart, rend, fd; 2008 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 2009 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 2010 int tag = ((PetscObject)viewer)->tag,cend,cstart,n; 2011 2012 PetscFunctionBegin; 2013 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 2014 if (!rank) { 2015 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2016 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 2017 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2018 if (header[3] < 0) { 2019 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 2020 } 2021 } 2022 2023 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2024 M = header[1]; N = header[2]; 2025 /* determine ownership of all rows */ 2026 m = M/size + ((M % size) > rank); 2027 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 2028 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2029 rowners[0] = 0; 2030 for ( i=2; i<=size; i++ ) { 2031 rowners[i] += rowners[i-1]; 2032 } 2033 rstart = rowners[rank]; 2034 rend = rowners[rank+1]; 2035 2036 /* distribute row lengths to all processors */ 2037 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 2038 offlens = ourlens + (rend-rstart); 2039 if (!rank) { 2040 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 2041 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2042 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 2043 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 2044 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 2045 PetscFree(sndcounts); 2046 } else { 2047 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 2048 } 2049 2050 if (!rank) { 2051 /* calculate the number of nonzeros on each processor */ 2052 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 2053 PetscMemzero(procsnz,size*sizeof(int)); 2054 for ( i=0; i<size; i++ ) { 2055 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 2056 procsnz[i] += rowlengths[j]; 2057 } 2058 } 2059 PetscFree(rowlengths); 2060 2061 /* determine max buffer needed and allocate it */ 2062 maxnz = 0; 2063 for ( i=0; i<size; i++ ) { 2064 maxnz = PetscMax(maxnz,procsnz[i]); 2065 } 2066 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 2067 2068 /* read in my part of the matrix column indices */ 2069 nz = procsnz[0]; 2070 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2071 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 2072 2073 /* read in every one elses and ship off */ 2074 for ( i=1; i<size; i++ ) { 2075 nz = procsnz[i]; 2076 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 2077 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2078 } 2079 PetscFree(cols); 2080 } else { 2081 /* determine buffer space needed for message */ 2082 nz = 0; 2083 for ( i=0; i<m; i++ ) { 2084 nz += ourlens[i]; 2085 } 2086 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 2087 2088 /* receive message of column indices*/ 2089 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2090 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2091 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2092 } 2093 2094 /* determine column ownership if matrix is not square */ 2095 if (N != M) { 2096 n = N/size + ((N % size) > rank); 2097 ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2098 cstart = cend - n; 2099 } else { 2100 cstart = rstart; 2101 cend = rend; 2102 n = cend - cstart; 2103 } 2104 2105 /* loop over local rows, determining number of off diagonal entries */ 2106 PetscMemzero(offlens,m*sizeof(int)); 2107 jj = 0; 2108 for ( i=0; i<m; i++ ) { 2109 for ( j=0; j<ourlens[i]; j++ ) { 2110 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2111 jj++; 2112 } 2113 } 2114 2115 /* create our matrix */ 2116 for ( i=0; i<m; i++ ) { 2117 ourlens[i] -= offlens[i]; 2118 } 2119 ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 2120 A = *newmat; 2121 ierr = MatSetOption(A,MAT_COLUMNS_SORTED); CHKERRQ(ierr); 2122 for ( i=0; i<m; i++ ) { 2123 ourlens[i] += offlens[i]; 2124 } 2125 2126 if (!rank) { 2127 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 2128 2129 /* read in my part of the matrix numerical values */ 2130 nz = procsnz[0]; 2131 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2132 2133 /* insert into matrix */ 2134 jj = rstart; 2135 smycols = mycols; 2136 svals = vals; 2137 for ( i=0; i<m; i++ ) { 2138 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2139 smycols += ourlens[i]; 2140 svals += ourlens[i]; 2141 jj++; 2142 } 2143 2144 /* read in other processors and ship out */ 2145 for ( i=1; i<size; i++ ) { 2146 nz = procsnz[i]; 2147 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 2148 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2149 } 2150 PetscFree(procsnz); 2151 } else { 2152 /* receive numeric values */ 2153 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 2154 2155 /* receive message of values*/ 2156 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2157 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2158 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2159 2160 /* insert into matrix */ 2161 jj = rstart; 2162 smycols = mycols; 2163 svals = vals; 2164 for ( i=0; i<m; i++ ) { 2165 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2166 smycols += ourlens[i]; 2167 svals += ourlens[i]; 2168 jj++; 2169 } 2170 } 2171 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 2172 2173 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2174 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2175 PetscFunctionReturn(0); 2176 } 2177 2178 #undef __FUNC__ 2179 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 2180 /* 2181 Not great since it makes two copies of the submatrix, first an SeqAIJ 2182 in local and then by concatenating the local matrices the end result. 2183 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 2184 */ 2185 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat) 2186 { 2187 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 2188 Mat *local,M, Mreuse; 2189 Scalar *vwork,*aa; 2190 MPI_Comm comm = mat->comm; 2191 Mat_SeqAIJ *aij; 2192 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 2193 2194 PetscFunctionBegin; 2195 MPI_Comm_rank(comm,&rank); 2196 MPI_Comm_size(comm,&size); 2197 2198 if (call == MAT_REUSE_MATRIX) { 2199 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 2200 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 2201 local = &Mreuse; 2202 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 2203 } else { 2204 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 2205 Mreuse = *local; 2206 PetscFree(local); 2207 } 2208 2209 /* 2210 m - number of local rows 2211 n - number of columns (same on all processors) 2212 rstart - first row in new global matrix generated 2213 */ 2214 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2215 if (call == MAT_INITIAL_MATRIX) { 2216 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2217 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2218 ii = aij->i; 2219 jj = aij->j; 2220 2221 /* 2222 Determine the number of non-zeros in the diagonal and off-diagonal 2223 portions of the matrix in order to do correct preallocation 2224 */ 2225 2226 /* first get start and end of "diagonal" columns */ 2227 if (csize == PETSC_DECIDE) { 2228 nlocal = n/size + ((n % size) > rank); 2229 } else { 2230 nlocal = csize; 2231 } 2232 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2233 rstart = rend - nlocal; 2234 if (rank == size - 1 && rend != n) { 2235 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2236 } 2237 2238 /* next, compute all the lengths */ 2239 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2240 olens = dlens + m; 2241 for ( i=0; i<m; i++ ) { 2242 jend = ii[i+1] - ii[i]; 2243 olen = 0; 2244 dlen = 0; 2245 for ( j=0; j<jend; j++ ) { 2246 if ( *jj < rstart || *jj >= rend) olen++; 2247 else dlen++; 2248 jj++; 2249 } 2250 olens[i] = olen; 2251 dlens[i] = dlen; 2252 } 2253 ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2254 PetscFree(dlens); 2255 } else { 2256 int ml,nl; 2257 2258 M = *newmat; 2259 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2260 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2261 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2262 /* 2263 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2264 rather than the slower MatSetValues(). 2265 */ 2266 M->was_assembled = PETSC_TRUE; 2267 M->assembled = PETSC_FALSE; 2268 } 2269 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2270 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2271 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2272 ii = aij->i; 2273 jj = aij->j; 2274 aa = aij->a; 2275 for (i=0; i<m; i++) { 2276 row = rstart + i; 2277 nz = ii[i+1] - ii[i]; 2278 cwork = jj; jj += nz; 2279 vwork = aa; aa += nz; 2280 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2281 } 2282 2283 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2284 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2285 *newmat = M; 2286 2287 /* save submatrix used in processor for next request */ 2288 if (call == MAT_INITIAL_MATRIX) { 2289 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2290 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2291 } 2292 2293 PetscFunctionReturn(0); 2294 } 2295