1 2 #ifdef PETSC_RCS_HEADER 3 static char vcid[] = "$Id: mpiaij.c,v 1.241 1998/05/06 15:28:09 bsmith Exp bsmith $"; 4 #endif 5 6 #include "pinclude/pviewer.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 11 /* local utility routine that creates a mapping from the global column 12 number to the local number in the off-diagonal part of the local 13 storage of the matrix. This is done in a non scable way since the 14 length of colmap equals the global matrix length. 15 */ 16 #undef __FUNC__ 17 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 18 int CreateColmap_MPIAIJ_Private(Mat mat) 19 { 20 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 21 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 22 int n = B->n,i; 23 24 PetscFunctionBegin; 25 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 26 PLogObjectMemory(mat,aij->N*sizeof(int)); 27 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 28 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 29 PetscFunctionReturn(0); 30 } 31 32 extern int DisAssemble_MPIAIJ(Mat); 33 34 #define CHUNKSIZE 15 35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 36 { \ 37 \ 38 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 39 rmax = aimax[row]; nrow = ailen[row]; \ 40 col1 = col - shift; \ 41 \ 42 low = 0; high = nrow; \ 43 while (high-low > 5) { \ 44 t = (low+high)/2; \ 45 if (rp[t] > col) high = t; \ 46 else low = t; \ 47 } \ 48 for ( _i=0; _i<nrow; _i++ ) { \ 49 if (rp[_i] > col1) break; \ 50 if (rp[_i] == col1) { \ 51 if (addv == ADD_VALUES) ap[_i] += value; \ 52 else ap[_i] = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (nonew == 1) goto a_noinsert; \ 57 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 58 if (nrow >= rmax) { \ 59 /* there is no extra room in row, therefore enlarge */ \ 60 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 61 Scalar *new_a; \ 62 \ 63 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 64 \ 65 /* malloc new storage space */ \ 66 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 67 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 68 new_j = (int *) (new_a + new_nz); \ 69 new_i = new_j + new_nz; \ 70 \ 71 /* copy over old data into new slots */ \ 72 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 73 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 74 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 75 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 76 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 77 len*sizeof(int)); \ 78 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 79 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 80 len*sizeof(Scalar)); \ 81 /* free up old matrix storage */ \ 82 \ 83 PetscFree(a->a); \ 84 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 85 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 86 a->singlemalloc = 1; \ 87 \ 88 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 89 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 90 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 91 a->maxnz += CHUNKSIZE; \ 92 a->reallocs++; \ 93 } \ 94 N = nrow++ - 1; a->nz++; \ 95 /* shift up all the later entries in this row */ \ 96 for ( ii=N; ii>=_i; ii-- ) { \ 97 rp[ii+1] = rp[ii]; \ 98 ap[ii+1] = ap[ii]; \ 99 } \ 100 rp[_i] = col1; \ 101 ap[_i] = value; \ 102 a_noinsert: ; \ 103 ailen[row] = nrow; \ 104 } 105 106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 107 { \ 108 \ 109 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 110 rmax = bimax[row]; nrow = bilen[row]; \ 111 col1 = col - shift; \ 112 \ 113 low = 0; high = nrow; \ 114 while (high-low > 5) { \ 115 t = (low+high)/2; \ 116 if (rp[t] > col) high = t; \ 117 else low = t; \ 118 } \ 119 for ( _i=0; _i<nrow; _i++ ) { \ 120 if (rp[_i] > col1) break; \ 121 if (rp[_i] == col1) { \ 122 if (addv == ADD_VALUES) ap[_i] += value; \ 123 else ap[_i] = value; \ 124 goto b_noinsert; \ 125 } \ 126 } \ 127 if (nonew == 1) goto b_noinsert; \ 128 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 129 if (nrow >= rmax) { \ 130 /* there is no extra room in row, therefore enlarge */ \ 131 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 132 Scalar *new_a; \ 133 \ 134 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 135 \ 136 /* malloc new storage space */ \ 137 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 138 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 139 new_j = (int *) (new_a + new_nz); \ 140 new_i = new_j + new_nz; \ 141 \ 142 /* copy over old data into new slots */ \ 143 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 144 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 145 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 146 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 147 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 148 len*sizeof(int)); \ 149 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 150 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 151 len*sizeof(Scalar)); \ 152 /* free up old matrix storage */ \ 153 \ 154 PetscFree(b->a); \ 155 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 156 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 157 b->singlemalloc = 1; \ 158 \ 159 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 160 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 161 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 162 b->maxnz += CHUNKSIZE; \ 163 b->reallocs++; \ 164 } \ 165 N = nrow++ - 1; b->nz++; \ 166 /* shift up all the later entries in this row */ \ 167 for ( ii=N; ii>=_i; ii-- ) { \ 168 rp[ii+1] = rp[ii]; \ 169 ap[ii+1] = ap[ii]; \ 170 } \ 171 rp[_i] = col1; \ 172 ap[_i] = value; \ 173 b_noinsert: ; \ 174 bilen[row] = nrow; \ 175 } 176 177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 178 #undef __FUNC__ 179 #define __FUNC__ "MatSetValues_MPIAIJ" 180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 181 { 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 183 Scalar value; 184 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 185 int cstart = aij->cstart, cend = aij->cend,row,col; 186 int roworiented = aij->roworiented; 187 188 /* Some Variables required in the macro */ 189 Mat A = aij->A; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 191 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 192 Scalar *aa = a->a; 193 194 Mat B = aij->B; 195 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 196 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 197 Scalar *ba = b->a; 198 199 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 200 int nonew = a->nonew,shift = a->indexshift; 201 Scalar *ap; 202 203 PetscFunctionBegin; 204 for ( i=0; i<m; i++ ) { 205 #if defined(USE_PETSC_BOPT_g) 206 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 207 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 208 #endif 209 if (im[i] >= rstart && im[i] < rend) { 210 row = im[i] - rstart; 211 for ( j=0; j<n; j++ ) { 212 if (in[j] >= cstart && in[j] < cend){ 213 col = in[j] - cstart; 214 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 215 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 216 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 217 } 218 #if defined(USE_PETSC_BOPT_g) 219 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 220 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 221 #endif 222 else { 223 if (mat->was_assembled) { 224 if (!aij->colmap) { 225 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 226 } 227 col = aij->colmap[in[j]] - 1; 228 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 229 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 230 col = in[j]; 231 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 232 B = aij->B; 233 b = (Mat_SeqAIJ *) B->data; 234 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 235 ba = b->a; 236 } 237 } else col = in[j]; 238 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 239 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 240 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 241 } 242 } 243 } 244 else { 245 if (roworiented && !aij->donotstash) { 246 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 247 } 248 else { 249 if (!aij->donotstash) { 250 row = im[i]; 251 for ( j=0; j<n; j++ ) { 252 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 253 } 254 } 255 } 256 } 257 } 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNC__ 262 #define __FUNC__ "MatGetValues_MPIAIJ" 263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 264 { 265 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 266 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 267 int cstart = aij->cstart, cend = aij->cend,row,col; 268 269 PetscFunctionBegin; 270 for ( i=0; i<m; i++ ) { 271 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 272 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 273 if (idxm[i] >= rstart && idxm[i] < rend) { 274 row = idxm[i] - rstart; 275 for ( j=0; j<n; j++ ) { 276 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 277 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 278 if (idxn[j] >= cstart && idxn[j] < cend){ 279 col = idxn[j] - cstart; 280 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 281 } 282 else { 283 if (!aij->colmap) { 284 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 285 } 286 col = aij->colmap[idxn[j]] - 1; 287 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 288 else { 289 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 290 } 291 } 292 } 293 } else { 294 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 295 } 296 } 297 PetscFunctionReturn(0); 298 } 299 300 #undef __FUNC__ 301 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 302 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 303 { 304 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 305 MPI_Comm comm = mat->comm; 306 int size = aij->size, *owners = aij->rowners; 307 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 308 MPI_Request *send_waits,*recv_waits; 309 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 310 InsertMode addv; 311 Scalar *rvalues,*svalues; 312 313 PetscFunctionBegin; 314 /* make sure all processors are either in INSERTMODE or ADDMODE */ 315 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 316 if (addv == (ADD_VALUES|INSERT_VALUES)) { 317 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 318 } 319 mat->insertmode = addv; /* in case this processor had no cache */ 320 321 /* first count number of contributors to each processor */ 322 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 323 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 324 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 325 for ( i=0; i<aij->stash.n; i++ ) { 326 idx = aij->stash.idx[i]; 327 for ( j=0; j<size; j++ ) { 328 if (idx >= owners[j] && idx < owners[j+1]) { 329 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 330 } 331 } 332 } 333 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 334 335 /* inform other processors of number of messages and max length*/ 336 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 337 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 338 nreceives = work[rank]; 339 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 340 nmax = work[rank]; 341 PetscFree(work); 342 343 /* post receives: 344 1) each message will consist of ordered pairs 345 (global index,value) we store the global index as a double 346 to simplify the message passing. 347 2) since we don't know how long each individual message is we 348 allocate the largest needed buffer for each receive. Potentially 349 this is a lot of wasted space. 350 351 352 This could be done better. 353 */ 354 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 355 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 356 for ( i=0; i<nreceives; i++ ) { 357 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 358 comm,recv_waits+i);CHKERRQ(ierr); 359 } 360 361 /* do sends: 362 1) starts[i] gives the starting index in svalues for stuff going to 363 the ith processor 364 */ 365 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 366 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 367 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 368 starts[0] = 0; 369 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 370 for ( i=0; i<aij->stash.n; i++ ) { 371 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 372 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 373 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 374 } 375 PetscFree(owner); 376 starts[0] = 0; 377 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 378 count = 0; 379 for ( i=0; i<size; i++ ) { 380 if (procs[i]) { 381 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 382 } 383 } 384 PetscFree(starts); PetscFree(nprocs); 385 386 /* Free cache space */ 387 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 388 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 389 390 aij->svalues = svalues; aij->rvalues = rvalues; 391 aij->nsends = nsends; aij->nrecvs = nreceives; 392 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 393 aij->rmax = nmax; 394 395 PetscFunctionReturn(0); 396 } 397 extern int MatSetUpMultiply_MPIAIJ(Mat); 398 399 #undef __FUNC__ 400 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 401 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 402 { 403 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 404 MPI_Status *send_status,recv_status; 405 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 406 int row,col,other_disassembled; 407 Scalar *values,val; 408 InsertMode addv = mat->insertmode; 409 410 PetscFunctionBegin; 411 /* wait on receives */ 412 while (count) { 413 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 414 /* unpack receives into our local space */ 415 values = aij->rvalues + 3*imdex*aij->rmax; 416 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 417 n = n/3; 418 for ( i=0; i<n; i++ ) { 419 row = (int) PetscReal(values[3*i]) - aij->rstart; 420 col = (int) PetscReal(values[3*i+1]); 421 val = values[3*i+2]; 422 if (col >= aij->cstart && col < aij->cend) { 423 col -= aij->cstart; 424 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 425 } else { 426 if (mat->was_assembled || mat->assembled) { 427 if (!aij->colmap) { 428 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 429 } 430 col = aij->colmap[col] - 1; 431 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 432 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 433 col = (int) PetscReal(values[3*i+1]); 434 } 435 } 436 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 437 } 438 } 439 count--; 440 } 441 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 442 443 /* wait on sends */ 444 if (aij->nsends) { 445 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 446 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 447 PetscFree(send_status); 448 } 449 PetscFree(aij->send_waits); PetscFree(aij->svalues); 450 451 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 452 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 453 454 /* determine if any processor has disassembled, if so we must 455 also disassemble ourselfs, in order that we may reassemble. */ 456 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 457 if (mat->was_assembled && !other_disassembled) { 458 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 459 } 460 461 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 462 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 463 } 464 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 465 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 466 467 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNC__ 472 #define __FUNC__ "MatZeroEntries_MPIAIJ" 473 int MatZeroEntries_MPIAIJ(Mat A) 474 { 475 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 476 int ierr; 477 478 PetscFunctionBegin; 479 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 480 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNC__ 485 #define __FUNC__ "MatZeroRows_MPIAIJ" 486 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 487 { 488 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 489 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 490 int *procs,*nprocs,j,found,idx,nsends,*work,row; 491 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 492 int *rvalues,tag = A->tag,count,base,slen,n,*source; 493 int *lens,imdex,*lrows,*values,rstart=l->rstart; 494 MPI_Comm comm = A->comm; 495 MPI_Request *send_waits,*recv_waits; 496 MPI_Status recv_status,*send_status; 497 IS istmp; 498 PetscTruth localdiag; 499 500 PetscFunctionBegin; 501 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 502 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 503 504 /* first count number of contributors to each processor */ 505 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 506 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 507 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 508 for ( i=0; i<N; i++ ) { 509 idx = rows[i]; 510 found = 0; 511 for ( j=0; j<size; j++ ) { 512 if (idx >= owners[j] && idx < owners[j+1]) { 513 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 514 } 515 } 516 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 517 } 518 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 519 520 /* inform other processors of number of messages and max length*/ 521 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 522 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 523 nrecvs = work[rank]; 524 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 525 nmax = work[rank]; 526 PetscFree(work); 527 528 /* post receives: */ 529 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 530 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 531 for ( i=0; i<nrecvs; i++ ) { 532 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 533 } 534 535 /* do sends: 536 1) starts[i] gives the starting index in svalues for stuff going to 537 the ith processor 538 */ 539 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 540 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 541 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 542 starts[0] = 0; 543 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 544 for ( i=0; i<N; i++ ) { 545 svalues[starts[owner[i]]++] = rows[i]; 546 } 547 ISRestoreIndices(is,&rows); 548 549 starts[0] = 0; 550 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 551 count = 0; 552 for ( i=0; i<size; i++ ) { 553 if (procs[i]) { 554 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 555 } 556 } 557 PetscFree(starts); 558 559 base = owners[rank]; 560 561 /* wait on receives */ 562 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 563 source = lens + nrecvs; 564 count = nrecvs; slen = 0; 565 while (count) { 566 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 567 /* unpack receives into our local space */ 568 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 569 source[imdex] = recv_status.MPI_SOURCE; 570 lens[imdex] = n; 571 slen += n; 572 count--; 573 } 574 PetscFree(recv_waits); 575 576 /* move the data into the send scatter */ 577 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 578 count = 0; 579 for ( i=0; i<nrecvs; i++ ) { 580 values = rvalues + i*nmax; 581 for ( j=0; j<lens[i]; j++ ) { 582 lrows[count++] = values[j] - base; 583 } 584 } 585 PetscFree(rvalues); PetscFree(lens); 586 PetscFree(owner); PetscFree(nprocs); 587 588 /* actually zap the local rows */ 589 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 590 PLogObjectParent(A,istmp); 591 592 /* 593 Zero the required rows. If the "diagonal block" of the matrix 594 is square and the user wishes to set the diagonal we use seperate 595 code so that MatSetValues() is not called for each diagonal allocating 596 new memory, thus calling lots of mallocs and slowing things down. 597 598 Contributed by: Mathew Knepley 599 */ 600 localdiag = PETSC_FALSE; 601 if (diag && (l->A->M == l->A->N)) { 602 localdiag = PETSC_TRUE; 603 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 604 } else { 605 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 606 } 607 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 608 ierr = ISDestroy(istmp); CHKERRQ(ierr); 609 610 if (diag && (localdiag == PETSC_FALSE)) { 611 for ( i = 0; i < slen; i++ ) { 612 row = lrows[i] + rstart; 613 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 614 } 615 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 616 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 617 } 618 619 ierr = ISDestroy(istmp); CHKERRQ(ierr); 620 621 if (diag) { 622 for ( i = 0; i < slen; i++ ) { 623 row = lrows[i] + rstart; 624 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 625 } 626 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 627 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 628 } 629 PetscFree(lrows); 630 631 /* wait on sends */ 632 if (nsends) { 633 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 634 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 635 PetscFree(send_status); 636 } 637 PetscFree(send_waits); PetscFree(svalues); 638 639 PetscFunctionReturn(0); 640 } 641 642 #undef __FUNC__ 643 #define __FUNC__ "MatMult_MPIAIJ" 644 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 645 { 646 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 647 int ierr,nt; 648 649 PetscFunctionBegin; 650 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 651 if (nt != a->n) { 652 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 653 } 654 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 655 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 656 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 657 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 658 PetscFunctionReturn(0); 659 } 660 661 #undef __FUNC__ 662 #define __FUNC__ "MatMultAdd_MPIAIJ" 663 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 664 { 665 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 666 int ierr; 667 668 PetscFunctionBegin; 669 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 670 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 671 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 672 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 673 PetscFunctionReturn(0); 674 } 675 676 #undef __FUNC__ 677 #define __FUNC__ "MatMultTrans_MPIAIJ" 678 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 679 { 680 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 681 int ierr; 682 683 PetscFunctionBegin; 684 /* do nondiagonal part */ 685 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 686 /* send it on its way */ 687 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 688 /* do local part */ 689 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 690 /* receive remote parts: note this assumes the values are not actually */ 691 /* inserted in yy until the next line, which is true for my implementation*/ 692 /* but is not perhaps always true. */ 693 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNC__ 698 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 699 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 700 { 701 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 702 int ierr; 703 704 PetscFunctionBegin; 705 /* do nondiagonal part */ 706 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 707 /* send it on its way */ 708 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 709 /* do local part */ 710 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 711 /* receive remote parts: note this assumes the values are not actually */ 712 /* inserted in yy until the next line, which is true for my implementation*/ 713 /* but is not perhaps always true. */ 714 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 715 PetscFunctionReturn(0); 716 } 717 718 /* 719 This only works correctly for square matrices where the subblock A->A is the 720 diagonal block 721 */ 722 #undef __FUNC__ 723 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 724 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 725 { 726 int ierr; 727 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 728 729 PetscFunctionBegin; 730 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 731 if (a->rstart != a->cstart || a->rend != a->cend) { 732 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 733 } 734 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 735 PetscFunctionReturn(0); 736 } 737 738 #undef __FUNC__ 739 #define __FUNC__ "MatScale_MPIAIJ" 740 int MatScale_MPIAIJ(Scalar *aa,Mat A) 741 { 742 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 743 int ierr; 744 745 PetscFunctionBegin; 746 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 747 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNC__ 752 #define __FUNC__ "MatDestroy_MPIAIJ" 753 int MatDestroy_MPIAIJ(Mat mat) 754 { 755 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 756 int ierr; 757 758 PetscFunctionBegin; 759 #if defined(USE_PETSC_LOG) 760 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 761 #endif 762 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 763 PetscFree(aij->rowners); 764 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 765 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 766 if (aij->colmap) PetscFree(aij->colmap); 767 if (aij->garray) PetscFree(aij->garray); 768 if (aij->lvec) VecDestroy(aij->lvec); 769 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 770 if (aij->rowvalues) PetscFree(aij->rowvalues); 771 PetscFree(aij); 772 PLogObjectDestroy(mat); 773 PetscHeaderDestroy(mat); 774 PetscFunctionReturn(0); 775 } 776 777 #undef __FUNC__ 778 #define __FUNC__ "MatView_MPIAIJ_Binary" 779 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 780 { 781 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 782 int ierr; 783 784 PetscFunctionBegin; 785 if (aij->size == 1) { 786 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 787 } 788 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 789 PetscFunctionReturn(0); 790 } 791 792 #undef __FUNC__ 793 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 794 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 795 { 796 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 797 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 798 int ierr, format,shift = C->indexshift,rank; 799 FILE *fd; 800 ViewerType vtype; 801 802 PetscFunctionBegin; 803 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 804 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 805 ierr = ViewerGetFormat(viewer,&format); 806 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 807 MatInfo info; 808 int flg; 809 MPI_Comm_rank(mat->comm,&rank); 810 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 811 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 812 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 813 PetscSequentialPhaseBegin(mat->comm,1); 814 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 815 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 816 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 817 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 818 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 819 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 820 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 821 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 822 fflush(fd); 823 PetscSequentialPhaseEnd(mat->comm,1); 824 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 825 PetscFunctionReturn(0); 826 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 827 PetscFunctionReturn(0); 828 } 829 } 830 831 if (vtype == DRAW_VIEWER) { 832 Draw draw; 833 PetscTruth isnull; 834 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 835 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 836 } 837 838 if (vtype == ASCII_FILE_VIEWER) { 839 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 840 PetscSequentialPhaseBegin(mat->comm,1); 841 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 842 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 843 aij->cend); 844 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 845 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 846 fflush(fd); 847 PetscSequentialPhaseEnd(mat->comm,1); 848 } else { 849 int size = aij->size; 850 rank = aij->rank; 851 if (size == 1) { 852 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 853 } else { 854 /* assemble the entire matrix onto first processor. */ 855 Mat A; 856 Mat_SeqAIJ *Aloc; 857 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 858 Scalar *a; 859 860 if (!rank) { 861 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 862 } else { 863 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 864 } 865 PLogObjectParent(mat,A); 866 867 /* copy over the A part */ 868 Aloc = (Mat_SeqAIJ*) aij->A->data; 869 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 870 row = aij->rstart; 871 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 872 for ( i=0; i<m; i++ ) { 873 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 874 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 875 } 876 aj = Aloc->j; 877 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 878 879 /* copy over the B part */ 880 Aloc = (Mat_SeqAIJ*) aij->B->data; 881 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 882 row = aij->rstart; 883 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 884 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 885 for ( i=0; i<m; i++ ) { 886 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 887 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 888 } 889 PetscFree(ct); 890 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 891 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 892 /* 893 Everyone has to call to draw the matrix since the graphics waits are 894 synchronized across all processors that share the Draw object 895 */ 896 if (!rank || vtype == DRAW_VIEWER) { 897 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 898 } 899 ierr = MatDestroy(A); CHKERRQ(ierr); 900 } 901 } 902 PetscFunctionReturn(0); 903 } 904 905 #undef __FUNC__ 906 #define __FUNC__ "MatView_MPIAIJ" 907 int MatView_MPIAIJ(Mat mat,Viewer viewer) 908 { 909 int ierr; 910 ViewerType vtype; 911 912 PetscFunctionBegin; 913 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 914 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 915 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 916 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 917 } else if (vtype == BINARY_FILE_VIEWER) { 918 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 919 } else { 920 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 921 } 922 PetscFunctionReturn(0); 923 } 924 925 /* 926 This has to provide several versions. 927 928 2) a) use only local smoothing updating outer values only once. 929 b) local smoothing updating outer values each inner iteration 930 3) color updating out values betwen colors. 931 */ 932 #undef __FUNC__ 933 #define __FUNC__ "MatRelax_MPIAIJ" 934 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 935 double fshift,int its,Vec xx) 936 { 937 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 938 Mat AA = mat->A, BB = mat->B; 939 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 940 Scalar *b,*x,*xs,*ls,d,*v,sum; 941 int ierr,*idx, *diag; 942 int n = mat->n, m = mat->m, i,shift = A->indexshift; 943 944 PetscFunctionBegin; 945 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 946 xs = x + shift; /* shift by one for index start of 1 */ 947 ls = ls + shift; 948 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 949 diag = A->diag; 950 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 951 if (flag & SOR_ZERO_INITIAL_GUESS) { 952 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 953 PetscFunctionReturn(0); 954 } 955 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 956 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 957 while (its--) { 958 /* go down through the rows */ 959 for ( i=0; i<m; i++ ) { 960 n = A->i[i+1] - A->i[i]; 961 idx = A->j + A->i[i] + shift; 962 v = A->a + A->i[i] + shift; 963 sum = b[i]; 964 SPARSEDENSEMDOT(sum,xs,v,idx,n); 965 d = fshift + A->a[diag[i]+shift]; 966 n = B->i[i+1] - B->i[i]; 967 idx = B->j + B->i[i] + shift; 968 v = B->a + B->i[i] + shift; 969 SPARSEDENSEMDOT(sum,ls,v,idx,n); 970 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 971 } 972 /* come up through the rows */ 973 for ( i=m-1; i>-1; i-- ) { 974 n = A->i[i+1] - A->i[i]; 975 idx = A->j + A->i[i] + shift; 976 v = A->a + A->i[i] + shift; 977 sum = b[i]; 978 SPARSEDENSEMDOT(sum,xs,v,idx,n); 979 d = fshift + A->a[diag[i]+shift]; 980 n = B->i[i+1] - B->i[i]; 981 idx = B->j + B->i[i] + shift; 982 v = B->a + B->i[i] + shift; 983 SPARSEDENSEMDOT(sum,ls,v,idx,n); 984 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 985 } 986 } 987 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 988 if (flag & SOR_ZERO_INITIAL_GUESS) { 989 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 993 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 994 while (its--) { 995 for ( i=0; i<m; i++ ) { 996 n = A->i[i+1] - A->i[i]; 997 idx = A->j + A->i[i] + shift; 998 v = A->a + A->i[i] + shift; 999 sum = b[i]; 1000 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1001 d = fshift + A->a[diag[i]+shift]; 1002 n = B->i[i+1] - B->i[i]; 1003 idx = B->j + B->i[i] + shift; 1004 v = B->a + B->i[i] + shift; 1005 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1006 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1007 } 1008 } 1009 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1010 if (flag & SOR_ZERO_INITIAL_GUESS) { 1011 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1012 PetscFunctionReturn(0); 1013 } 1014 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 1015 mat->Mvctx); CHKERRQ(ierr); 1016 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 1017 mat->Mvctx); CHKERRQ(ierr); 1018 while (its--) { 1019 for ( i=m-1; i>-1; i-- ) { 1020 n = A->i[i+1] - A->i[i]; 1021 idx = A->j + A->i[i] + shift; 1022 v = A->a + A->i[i] + shift; 1023 sum = b[i]; 1024 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1025 d = fshift + A->a[diag[i]+shift]; 1026 n = B->i[i+1] - B->i[i]; 1027 idx = B->j + B->i[i] + shift; 1028 v = B->a + B->i[i] + shift; 1029 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1030 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1031 } 1032 } 1033 } else { 1034 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1035 } 1036 PetscFunctionReturn(0); 1037 } 1038 1039 #undef __FUNC__ 1040 #define __FUNC__ "MatGetInfo_MPIAIJ" 1041 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1042 { 1043 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1044 Mat A = mat->A, B = mat->B; 1045 int ierr; 1046 double isend[5], irecv[5]; 1047 1048 PetscFunctionBegin; 1049 info->block_size = 1.0; 1050 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1051 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1052 isend[3] = info->memory; isend[4] = info->mallocs; 1053 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1054 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1055 isend[3] += info->memory; isend[4] += info->mallocs; 1056 if (flag == MAT_LOCAL) { 1057 info->nz_used = isend[0]; 1058 info->nz_allocated = isend[1]; 1059 info->nz_unneeded = isend[2]; 1060 info->memory = isend[3]; 1061 info->mallocs = isend[4]; 1062 } else if (flag == MAT_GLOBAL_MAX) { 1063 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1064 info->nz_used = irecv[0]; 1065 info->nz_allocated = irecv[1]; 1066 info->nz_unneeded = irecv[2]; 1067 info->memory = irecv[3]; 1068 info->mallocs = irecv[4]; 1069 } else if (flag == MAT_GLOBAL_SUM) { 1070 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1071 info->nz_used = irecv[0]; 1072 info->nz_allocated = irecv[1]; 1073 info->nz_unneeded = irecv[2]; 1074 info->memory = irecv[3]; 1075 info->mallocs = irecv[4]; 1076 } 1077 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1078 info->fill_ratio_needed = 0; 1079 info->factor_mallocs = 0; 1080 info->rows_global = (double)mat->M; 1081 info->columns_global = (double)mat->N; 1082 info->rows_local = (double)mat->m; 1083 info->columns_local = (double)mat->N; 1084 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNC__ 1089 #define __FUNC__ "MatSetOption_MPIAIJ" 1090 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1091 { 1092 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1093 1094 PetscFunctionBegin; 1095 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1096 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1097 op == MAT_COLUMNS_UNSORTED || 1098 op == MAT_COLUMNS_SORTED || 1099 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1100 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1101 MatSetOption(a->A,op); 1102 MatSetOption(a->B,op); 1103 } else if (op == MAT_ROW_ORIENTED) { 1104 a->roworiented = 1; 1105 MatSetOption(a->A,op); 1106 MatSetOption(a->B,op); 1107 } else if (op == MAT_ROWS_SORTED || 1108 op == MAT_ROWS_UNSORTED || 1109 op == MAT_SYMMETRIC || 1110 op == MAT_STRUCTURALLY_SYMMETRIC || 1111 op == MAT_YES_NEW_DIAGONALS) 1112 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1113 else if (op == MAT_COLUMN_ORIENTED) { 1114 a->roworiented = 0; 1115 MatSetOption(a->A,op); 1116 MatSetOption(a->B,op); 1117 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1118 a->donotstash = 1; 1119 } else if (op == MAT_NO_NEW_DIAGONALS){ 1120 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1121 } else { 1122 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1123 } 1124 PetscFunctionReturn(0); 1125 } 1126 1127 #undef __FUNC__ 1128 #define __FUNC__ "MatGetSize_MPIAIJ" 1129 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1130 { 1131 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1132 1133 PetscFunctionBegin; 1134 if (m) *m = mat->M; 1135 if (n) *n = mat->N; 1136 PetscFunctionReturn(0); 1137 } 1138 1139 #undef __FUNC__ 1140 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1141 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1142 { 1143 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1144 1145 PetscFunctionBegin; 1146 if (m) *m = mat->m; 1147 if (n) *n = mat->n; 1148 PetscFunctionReturn(0); 1149 } 1150 1151 #undef __FUNC__ 1152 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1153 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1154 { 1155 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1156 1157 PetscFunctionBegin; 1158 *m = mat->rstart; *n = mat->rend; 1159 PetscFunctionReturn(0); 1160 } 1161 1162 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1163 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1164 1165 #undef __FUNC__ 1166 #define __FUNC__ "MatGetRow_MPIAIJ" 1167 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1168 { 1169 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1170 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1171 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1172 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1173 int *cmap, *idx_p; 1174 1175 PetscFunctionBegin; 1176 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1177 mat->getrowactive = PETSC_TRUE; 1178 1179 if (!mat->rowvalues && (idx || v)) { 1180 /* 1181 allocate enough space to hold information from the longest row. 1182 */ 1183 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1184 int max = 1,m = mat->m,tmp; 1185 for ( i=0; i<m; i++ ) { 1186 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1187 if (max < tmp) { max = tmp; } 1188 } 1189 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1190 CHKPTRQ(mat->rowvalues); 1191 mat->rowindices = (int *) (mat->rowvalues + max); 1192 } 1193 1194 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1195 lrow = row - rstart; 1196 1197 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1198 if (!v) {pvA = 0; pvB = 0;} 1199 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1200 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1201 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1202 nztot = nzA + nzB; 1203 1204 cmap = mat->garray; 1205 if (v || idx) { 1206 if (nztot) { 1207 /* Sort by increasing column numbers, assuming A and B already sorted */ 1208 int imark = -1; 1209 if (v) { 1210 *v = v_p = mat->rowvalues; 1211 for ( i=0; i<nzB; i++ ) { 1212 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1213 else break; 1214 } 1215 imark = i; 1216 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1217 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1218 } 1219 if (idx) { 1220 *idx = idx_p = mat->rowindices; 1221 if (imark > -1) { 1222 for ( i=0; i<imark; i++ ) { 1223 idx_p[i] = cmap[cworkB[i]]; 1224 } 1225 } else { 1226 for ( i=0; i<nzB; i++ ) { 1227 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1228 else break; 1229 } 1230 imark = i; 1231 } 1232 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1233 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1234 } 1235 } 1236 else { 1237 if (idx) *idx = 0; 1238 if (v) *v = 0; 1239 } 1240 } 1241 *nz = nztot; 1242 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1243 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1244 PetscFunctionReturn(0); 1245 } 1246 1247 #undef __FUNC__ 1248 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1249 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1250 { 1251 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1252 1253 PetscFunctionBegin; 1254 if (aij->getrowactive == PETSC_FALSE) { 1255 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1256 } 1257 aij->getrowactive = PETSC_FALSE; 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNC__ 1262 #define __FUNC__ "MatNorm_MPIAIJ" 1263 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1264 { 1265 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1266 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1267 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1268 double sum = 0.0; 1269 Scalar *v; 1270 1271 PetscFunctionBegin; 1272 if (aij->size == 1) { 1273 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1274 } else { 1275 if (type == NORM_FROBENIUS) { 1276 v = amat->a; 1277 for (i=0; i<amat->nz; i++ ) { 1278 #if defined(USE_PETSC_COMPLEX) 1279 sum += real(conj(*v)*(*v)); v++; 1280 #else 1281 sum += (*v)*(*v); v++; 1282 #endif 1283 } 1284 v = bmat->a; 1285 for (i=0; i<bmat->nz; i++ ) { 1286 #if defined(USE_PETSC_COMPLEX) 1287 sum += real(conj(*v)*(*v)); v++; 1288 #else 1289 sum += (*v)*(*v); v++; 1290 #endif 1291 } 1292 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1293 *norm = sqrt(*norm); 1294 } else if (type == NORM_1) { /* max column norm */ 1295 double *tmp, *tmp2; 1296 int *jj, *garray = aij->garray; 1297 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1298 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1299 PetscMemzero(tmp,aij->N*sizeof(double)); 1300 *norm = 0.0; 1301 v = amat->a; jj = amat->j; 1302 for ( j=0; j<amat->nz; j++ ) { 1303 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1304 } 1305 v = bmat->a; jj = bmat->j; 1306 for ( j=0; j<bmat->nz; j++ ) { 1307 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1308 } 1309 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1310 for ( j=0; j<aij->N; j++ ) { 1311 if (tmp2[j] > *norm) *norm = tmp2[j]; 1312 } 1313 PetscFree(tmp); PetscFree(tmp2); 1314 } else if (type == NORM_INFINITY) { /* max row norm */ 1315 double ntemp = 0.0; 1316 for ( j=0; j<amat->m; j++ ) { 1317 v = amat->a + amat->i[j] + shift; 1318 sum = 0.0; 1319 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1320 sum += PetscAbsScalar(*v); v++; 1321 } 1322 v = bmat->a + bmat->i[j] + shift; 1323 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1324 sum += PetscAbsScalar(*v); v++; 1325 } 1326 if (sum > ntemp) ntemp = sum; 1327 } 1328 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1329 } else { 1330 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1331 } 1332 } 1333 PetscFunctionReturn(0); 1334 } 1335 1336 #undef __FUNC__ 1337 #define __FUNC__ "MatTranspose_MPIAIJ" 1338 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1339 { 1340 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1341 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1342 int ierr,shift = Aloc->indexshift; 1343 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1344 Mat B; 1345 Scalar *array; 1346 1347 PetscFunctionBegin; 1348 if (matout == PETSC_NULL && M != N) { 1349 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1350 } 1351 1352 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1353 1354 /* copy over the A part */ 1355 Aloc = (Mat_SeqAIJ*) a->A->data; 1356 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1357 row = a->rstart; 1358 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1359 for ( i=0; i<m; i++ ) { 1360 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1361 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1362 } 1363 aj = Aloc->j; 1364 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1365 1366 /* copy over the B part */ 1367 Aloc = (Mat_SeqAIJ*) a->B->data; 1368 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1369 row = a->rstart; 1370 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1371 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1372 for ( i=0; i<m; i++ ) { 1373 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1374 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1375 } 1376 PetscFree(ct); 1377 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1378 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1379 if (matout != PETSC_NULL) { 1380 *matout = B; 1381 } else { 1382 PetscOps *Abops; 1383 struct _MatOps *Aops; 1384 1385 /* This isn't really an in-place transpose .... but free data structures from a */ 1386 PetscFree(a->rowners); 1387 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1388 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1389 if (a->colmap) PetscFree(a->colmap); 1390 if (a->garray) PetscFree(a->garray); 1391 if (a->lvec) VecDestroy(a->lvec); 1392 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1393 PetscFree(a); 1394 1395 /* 1396 This is horrible, horrible code. We need to keep the 1397 A pointers for the bops and ops but copy everything 1398 else from C. 1399 */ 1400 Abops = A->bops; 1401 Aops = A->ops; 1402 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1403 A->bops = Abops; 1404 A->ops = Aops; 1405 PetscHeaderDestroy(B); 1406 } 1407 PetscFunctionReturn(0); 1408 } 1409 1410 #undef __FUNC__ 1411 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1412 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1413 { 1414 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1415 Mat a = aij->A, b = aij->B; 1416 int ierr,s1,s2,s3; 1417 1418 PetscFunctionBegin; 1419 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1420 if (rr) { 1421 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1422 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1423 /* Overlap communication with computation. */ 1424 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1425 } 1426 if (ll) { 1427 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1428 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1429 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1430 } 1431 /* scale the diagonal block */ 1432 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1433 1434 if (rr) { 1435 /* Do a scatter end and then right scale the off-diagonal block */ 1436 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1437 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1438 } 1439 1440 PetscFunctionReturn(0); 1441 } 1442 1443 1444 extern int MatPrintHelp_SeqAIJ(Mat); 1445 #undef __FUNC__ 1446 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1447 int MatPrintHelp_MPIAIJ(Mat A) 1448 { 1449 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1450 int ierr; 1451 1452 PetscFunctionBegin; 1453 if (!a->rank) { 1454 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNC__ 1460 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1461 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1462 { 1463 PetscFunctionBegin; 1464 *bs = 1; 1465 PetscFunctionReturn(0); 1466 } 1467 #undef __FUNC__ 1468 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1469 int MatSetUnfactored_MPIAIJ(Mat A) 1470 { 1471 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1472 int ierr; 1473 1474 PetscFunctionBegin; 1475 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1476 PetscFunctionReturn(0); 1477 } 1478 1479 #undef __FUNC__ 1480 #define __FUNC__ "MatEqual_MPIAIJ" 1481 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1482 { 1483 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1484 Mat a, b, c, d; 1485 PetscTruth flg; 1486 int ierr; 1487 1488 PetscFunctionBegin; 1489 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1490 a = matA->A; b = matA->B; 1491 c = matB->A; d = matB->B; 1492 1493 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1494 if (flg == PETSC_TRUE) { 1495 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1496 } 1497 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } 1500 1501 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1502 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1503 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1504 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1505 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *); 1506 1507 /* -------------------------------------------------------------------*/ 1508 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1509 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1510 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1511 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1512 0,0, 1513 0,0, 1514 0,0, 1515 MatRelax_MPIAIJ, 1516 MatTranspose_MPIAIJ, 1517 MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1518 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1519 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1520 0, 1521 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1522 0,0,0,0, 1523 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1524 0,0, 1525 0,0,MatConvertSameType_MPIAIJ,0,0, 1526 0,0,0, 1527 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1528 MatPrintHelp_MPIAIJ, 1529 MatScale_MPIAIJ,0,0,0, 1530 MatGetBlockSize_MPIAIJ,0,0,0,0, 1531 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ, 1532 0,0,MatGetSubMatrix_MPIAIJ}; 1533 1534 1535 #undef __FUNC__ 1536 #define __FUNC__ "MatCreateMPIAIJ" 1537 /*@C 1538 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1539 (the default parallel PETSc format). For good matrix assembly performance 1540 the user should preallocate the matrix storage by setting the parameters 1541 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1542 performance can be increased by more than a factor of 50. 1543 1544 Collective on MPI_Comm 1545 1546 Input Parameters: 1547 + comm - MPI communicator 1548 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1549 This value should be the same as the local size used in creating the 1550 y vector for the matrix-vector product y = Ax. 1551 . n - This value should be the same as the local size used in creating the 1552 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1553 calculated if N is given) For square matrices n is almost always m. 1554 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1555 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1556 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1557 (same for all local rows) 1558 . d_nzz - array containing the number of nonzeros in the various rows of the 1559 diagonal portion of the local submatrix (possibly different for each row) 1560 or PETSC_NULL. You must leave room for the diagonal entry even if 1561 it is zero. 1562 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1563 submatrix (same for all local rows). 1564 - o_nzz - array containing the number of nonzeros in the various rows of the 1565 off-diagonal portion of the local submatrix (possibly different for 1566 each row) or PETSC_NULL. 1567 1568 Output Parameter: 1569 . A - the matrix 1570 1571 Notes: 1572 The AIJ format (also called the Yale sparse matrix format or 1573 compressed row storage), is fully compatible with standard Fortran 77 1574 storage. That is, the stored row and column indices can begin at 1575 either one (as in Fortran) or zero. See the users manual for details. 1576 1577 The user MUST specify either the local or global matrix dimensions 1578 (possibly both). 1579 1580 By default, this format uses inodes (identical nodes) when possible. 1581 We search for consecutive rows with the same nonzero structure, thereby 1582 reusing matrix information to achieve increased efficiency. 1583 1584 Options Database Keys: 1585 + -mat_aij_no_inode - Do not use inodes 1586 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1587 - -mat_aij_oneindex - Internally use indexing starting at 1 1588 rather than 0. Note that when calling MatSetValues(), 1589 the user still MUST index entries starting at 0! 1590 1591 Storage Information: 1592 For a square global matrix we define each processor's diagonal portion 1593 to be its local rows and the corresponding columns (a square submatrix); 1594 each processor's off-diagonal portion encompasses the remainder of the 1595 local matrix (a rectangular submatrix). 1596 1597 The user can specify preallocated storage for the diagonal part of 1598 the local submatrix with either d_nz or d_nnz (not both). Set 1599 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1600 memory allocation. Likewise, specify preallocated storage for the 1601 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1602 1603 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1604 the figure below we depict these three local rows and all columns (0-11). 1605 1606 .vb 1607 0 1 2 3 4 5 6 7 8 9 10 11 1608 ------------------- 1609 row 3 | o o o d d d o o o o o o 1610 row 4 | o o o d d d o o o o o o 1611 row 5 | o o o d d d o o o o o o 1612 ------------------- 1613 .ve 1614 1615 Thus, any entries in the d locations are stored in the d (diagonal) 1616 submatrix, and any entries in the o locations are stored in the 1617 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1618 stored simply in the MATSEQAIJ format for compressed row storage. 1619 1620 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1621 and o_nz should indicate the number of nonzeros per row in the o matrix. 1622 In general, for PDE problems in which most nonzeros are near the diagonal, 1623 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1624 or you will get TERRIBLE performance; see the users' manual chapter on 1625 matrices. 1626 1627 .keywords: matrix, aij, compressed row, sparse, parallel 1628 1629 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1630 @*/ 1631 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1632 { 1633 Mat B; 1634 Mat_MPIAIJ *b; 1635 int ierr, i,sum[2],work[2],size; 1636 1637 PetscFunctionBegin; 1638 MPI_Comm_size(comm,&size); 1639 if (size == 1) { 1640 if (M == PETSC_DECIDE) M = m; 1641 if (N == PETSC_DECIDE) N = n; 1642 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1643 PetscFunctionReturn(0); 1644 } 1645 1646 *A = 0; 1647 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1648 PLogObjectCreate(B); 1649 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1650 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1651 PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps)); 1652 B->ops->destroy = MatDestroy_MPIAIJ; 1653 B->ops->view = MatView_MPIAIJ; 1654 B->factor = 0; 1655 B->assembled = PETSC_FALSE; 1656 B->mapping = 0; 1657 1658 B->insertmode = NOT_SET_VALUES; 1659 b->size = size; 1660 MPI_Comm_rank(comm,&b->rank); 1661 1662 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1663 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1664 } 1665 1666 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1667 work[0] = m; work[1] = n; 1668 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1669 if (M == PETSC_DECIDE) M = sum[0]; 1670 if (N == PETSC_DECIDE) N = sum[1]; 1671 } 1672 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1673 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1674 b->m = m; B->m = m; 1675 b->n = n; B->n = n; 1676 b->N = N; B->N = N; 1677 b->M = M; B->M = M; 1678 1679 /* build local table of row and column ownerships */ 1680 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1681 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1682 b->cowners = b->rowners + b->size + 2; 1683 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1684 b->rowners[0] = 0; 1685 for ( i=2; i<=b->size; i++ ) { 1686 b->rowners[i] += b->rowners[i-1]; 1687 } 1688 b->rstart = b->rowners[b->rank]; 1689 b->rend = b->rowners[b->rank+1]; 1690 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1691 b->cowners[0] = 0; 1692 for ( i=2; i<=b->size; i++ ) { 1693 b->cowners[i] += b->cowners[i-1]; 1694 } 1695 b->cstart = b->cowners[b->rank]; 1696 b->cend = b->cowners[b->rank+1]; 1697 1698 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1699 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1700 PLogObjectParent(B,b->A); 1701 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1702 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1703 PLogObjectParent(B,b->B); 1704 1705 /* build cache for off array entries formed */ 1706 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1707 b->donotstash = 0; 1708 b->colmap = 0; 1709 b->garray = 0; 1710 b->roworiented = 1; 1711 1712 /* stuff used for matrix vector multiply */ 1713 b->lvec = 0; 1714 b->Mvctx = 0; 1715 1716 /* stuff for MatGetRow() */ 1717 b->rowindices = 0; 1718 b->rowvalues = 0; 1719 b->getrowactive = PETSC_FALSE; 1720 1721 *A = B; 1722 PetscFunctionReturn(0); 1723 } 1724 1725 #undef __FUNC__ 1726 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1727 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1728 { 1729 Mat mat; 1730 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1731 int ierr, len=0, flg; 1732 1733 PetscFunctionBegin; 1734 *newmat = 0; 1735 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1736 PLogObjectCreate(mat); 1737 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1738 PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps)); 1739 mat->ops->destroy = MatDestroy_MPIAIJ; 1740 mat->ops->view = MatView_MPIAIJ; 1741 mat->factor = matin->factor; 1742 mat->assembled = PETSC_TRUE; 1743 1744 a->m = mat->m = oldmat->m; 1745 a->n = mat->n = oldmat->n; 1746 a->M = mat->M = oldmat->M; 1747 a->N = mat->N = oldmat->N; 1748 1749 a->rstart = oldmat->rstart; 1750 a->rend = oldmat->rend; 1751 a->cstart = oldmat->cstart; 1752 a->cend = oldmat->cend; 1753 a->size = oldmat->size; 1754 a->rank = oldmat->rank; 1755 mat->insertmode = NOT_SET_VALUES; 1756 a->rowvalues = 0; 1757 a->getrowactive = PETSC_FALSE; 1758 1759 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1760 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1761 a->cowners = a->rowners + a->size + 2; 1762 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1763 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1764 if (oldmat->colmap) { 1765 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1766 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1767 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1768 } else a->colmap = 0; 1769 if (oldmat->garray) { 1770 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1771 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1772 PLogObjectMemory(mat,len*sizeof(int)); 1773 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1774 } else a->garray = 0; 1775 1776 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1777 PLogObjectParent(mat,a->lvec); 1778 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1779 PLogObjectParent(mat,a->Mvctx); 1780 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1781 PLogObjectParent(mat,a->A); 1782 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1783 PLogObjectParent(mat,a->B); 1784 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1785 if (flg) { 1786 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1787 } 1788 *newmat = mat; 1789 PetscFunctionReturn(0); 1790 } 1791 1792 #include "sys.h" 1793 1794 #undef __FUNC__ 1795 #define __FUNC__ "MatLoad_MPIAIJ" 1796 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1797 { 1798 Mat A; 1799 Scalar *vals,*svals; 1800 MPI_Comm comm = ((PetscObject)viewer)->comm; 1801 MPI_Status status; 1802 int i, nz, ierr, j,rstart, rend, fd; 1803 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1804 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1805 int tag = ((PetscObject)viewer)->tag; 1806 1807 PetscFunctionBegin; 1808 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1809 if (!rank) { 1810 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1811 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1812 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1813 if (header[3] < 0) { 1814 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1815 } 1816 } 1817 1818 1819 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1820 M = header[1]; N = header[2]; 1821 /* determine ownership of all rows */ 1822 m = M/size + ((M % size) > rank); 1823 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1824 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1825 rowners[0] = 0; 1826 for ( i=2; i<=size; i++ ) { 1827 rowners[i] += rowners[i-1]; 1828 } 1829 rstart = rowners[rank]; 1830 rend = rowners[rank+1]; 1831 1832 /* distribute row lengths to all processors */ 1833 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1834 offlens = ourlens + (rend-rstart); 1835 if (!rank) { 1836 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1837 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1838 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1839 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1840 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1841 PetscFree(sndcounts); 1842 } else { 1843 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1844 } 1845 1846 if (!rank) { 1847 /* calculate the number of nonzeros on each processor */ 1848 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1849 PetscMemzero(procsnz,size*sizeof(int)); 1850 for ( i=0; i<size; i++ ) { 1851 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1852 procsnz[i] += rowlengths[j]; 1853 } 1854 } 1855 PetscFree(rowlengths); 1856 1857 /* determine max buffer needed and allocate it */ 1858 maxnz = 0; 1859 for ( i=0; i<size; i++ ) { 1860 maxnz = PetscMax(maxnz,procsnz[i]); 1861 } 1862 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1863 1864 /* read in my part of the matrix column indices */ 1865 nz = procsnz[0]; 1866 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1867 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1868 1869 /* read in every one elses and ship off */ 1870 for ( i=1; i<size; i++ ) { 1871 nz = procsnz[i]; 1872 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1873 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1874 } 1875 PetscFree(cols); 1876 } else { 1877 /* determine buffer space needed for message */ 1878 nz = 0; 1879 for ( i=0; i<m; i++ ) { 1880 nz += ourlens[i]; 1881 } 1882 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1883 1884 /* receive message of column indices*/ 1885 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1886 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1887 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1888 } 1889 1890 /* loop over local rows, determining number of off diagonal entries */ 1891 PetscMemzero(offlens,m*sizeof(int)); 1892 jj = 0; 1893 for ( i=0; i<m; i++ ) { 1894 for ( j=0; j<ourlens[i]; j++ ) { 1895 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1896 jj++; 1897 } 1898 } 1899 1900 /* create our matrix */ 1901 for ( i=0; i<m; i++ ) { 1902 ourlens[i] -= offlens[i]; 1903 } 1904 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1905 A = *newmat; 1906 MatSetOption(A,MAT_COLUMNS_SORTED); 1907 for ( i=0; i<m; i++ ) { 1908 ourlens[i] += offlens[i]; 1909 } 1910 1911 if (!rank) { 1912 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1913 1914 /* read in my part of the matrix numerical values */ 1915 nz = procsnz[0]; 1916 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1917 1918 /* insert into matrix */ 1919 jj = rstart; 1920 smycols = mycols; 1921 svals = vals; 1922 for ( i=0; i<m; i++ ) { 1923 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1924 smycols += ourlens[i]; 1925 svals += ourlens[i]; 1926 jj++; 1927 } 1928 1929 /* read in other processors and ship out */ 1930 for ( i=1; i<size; i++ ) { 1931 nz = procsnz[i]; 1932 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1933 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1934 } 1935 PetscFree(procsnz); 1936 } else { 1937 /* receive numeric values */ 1938 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1939 1940 /* receive message of values*/ 1941 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1942 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1943 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1944 1945 /* insert into matrix */ 1946 jj = rstart; 1947 smycols = mycols; 1948 svals = vals; 1949 for ( i=0; i<m; i++ ) { 1950 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1951 smycols += ourlens[i]; 1952 svals += ourlens[i]; 1953 jj++; 1954 } 1955 } 1956 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1957 1958 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1959 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1960 PetscFunctionReturn(0); 1961 } 1962 1963 #undef __FUNC__ 1964 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 1965 /* 1966 Not great since it makes two copies of the submatrix, first an SeqAIJ 1967 in local and then by concatenating the local matrices the end result. 1968 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1969 */ 1970 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat) 1971 { 1972 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1973 Mat *local,M, Mreuse; 1974 Scalar *vwork,*aa; 1975 MPI_Comm comm = mat->comm; 1976 Mat_SeqAIJ *aij; 1977 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 1978 1979 PetscFunctionBegin; 1980 MPI_Comm_rank(comm,&rank); 1981 MPI_Comm_size(comm,&size); 1982 1983 if (call == MAT_REUSE_MATRIX) { 1984 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1985 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 1986 local = &Mreuse; 1987 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1988 } else { 1989 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1990 Mreuse = *local; 1991 PetscFree(local); 1992 } 1993 1994 /* 1995 m - number of local rows 1996 n - number of columns (same on all processors) 1997 rstart - first row in new global matrix generated 1998 */ 1999 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 2000 if (call == MAT_INITIAL_MATRIX) { 2001 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2002 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2003 ii = aij->i; 2004 jj = aij->j; 2005 2006 /* 2007 Determine the number of non-zeros in the diagonal and off-diagonal 2008 portions of the matrix in order to do correct preallocation 2009 */ 2010 2011 /* first get start and end of "diagonal" columns */ 2012 if (csize == PETSC_DECIDE) { 2013 nlocal = n/size + ((n % size) > rank); 2014 } else { 2015 nlocal = csize; 2016 } 2017 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2018 rstart = rend - nlocal; 2019 if (rank == size - 1 && rend != n) { 2020 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2021 } 2022 2023 /* next, compute all the lengths */ 2024 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2025 olens = dlens + m; 2026 for ( i=0; i<m; i++ ) { 2027 jend = ii[i+1] - ii[i]; 2028 olen = 0; 2029 dlen = 0; 2030 for ( j=0; j<jend; j++ ) { 2031 if ( *jj < rstart || *jj >= rend) olen++; 2032 else dlen++; 2033 jj++; 2034 } 2035 olens[i] = olen; 2036 dlens[i] = dlen; 2037 } 2038 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2039 PetscFree(dlens); 2040 } else { 2041 int ml,nl; 2042 2043 M = *newmat; 2044 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2045 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2046 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2047 /* 2048 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2049 rather than the slower MatSetValues(). 2050 */ 2051 M->was_assembled = PETSC_TRUE; 2052 M->assembled = PETSC_FALSE; 2053 } 2054 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2055 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2056 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2057 ii = aij->i; 2058 jj = aij->j; 2059 aa = aij->a; 2060 for (i=0; i<m; i++) { 2061 row = rstart + i; 2062 nz = ii[i+1] - ii[i]; 2063 cwork = jj; jj += nz; 2064 vwork = aa; aa += nz; 2065 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2066 } 2067 2068 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2069 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2070 *newmat = M; 2071 2072 /* save submatrix used in processor for next request */ 2073 if (call == MAT_INITIAL_MATRIX) { 2074 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2075 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2076 } 2077 2078 PetscFunctionReturn(0); 2079 } 2080