1 2 #ifdef PETSC_RCS_HEADER 3 static char vcid[] = "$Id: mpiaij.c,v 1.240 1998/04/27 03:54:07 curfman Exp bsmith $"; 4 #endif 5 6 #include "pinclude/pviewer.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 11 /* local utility routine that creates a mapping from the global column 12 number to the local number in the off-diagonal part of the local 13 storage of the matrix. This is done in a non scable way since the 14 length of colmap equals the global matrix length. 15 */ 16 #undef __FUNC__ 17 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 18 int CreateColmap_MPIAIJ_Private(Mat mat) 19 { 20 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 21 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 22 int n = B->n,i; 23 24 PetscFunctionBegin; 25 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 26 PLogObjectMemory(mat,aij->N*sizeof(int)); 27 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 28 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 29 PetscFunctionReturn(0); 30 } 31 32 extern int DisAssemble_MPIAIJ(Mat); 33 34 #define CHUNKSIZE 15 35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 36 { \ 37 \ 38 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 39 rmax = aimax[row]; nrow = ailen[row]; \ 40 col1 = col - shift; \ 41 \ 42 low = 0; high = nrow; \ 43 while (high-low > 5) { \ 44 t = (low+high)/2; \ 45 if (rp[t] > col) high = t; \ 46 else low = t; \ 47 } \ 48 for ( _i=0; _i<nrow; _i++ ) { \ 49 if (rp[_i] > col1) break; \ 50 if (rp[_i] == col1) { \ 51 if (addv == ADD_VALUES) ap[_i] += value; \ 52 else ap[_i] = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (nonew == 1) goto a_noinsert; \ 57 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 58 if (nrow >= rmax) { \ 59 /* there is no extra room in row, therefore enlarge */ \ 60 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 61 Scalar *new_a; \ 62 \ 63 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 64 \ 65 /* malloc new storage space */ \ 66 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 67 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 68 new_j = (int *) (new_a + new_nz); \ 69 new_i = new_j + new_nz; \ 70 \ 71 /* copy over old data into new slots */ \ 72 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 73 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 74 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 75 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 76 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 77 len*sizeof(int)); \ 78 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 79 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 80 len*sizeof(Scalar)); \ 81 /* free up old matrix storage */ \ 82 \ 83 PetscFree(a->a); \ 84 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 85 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 86 a->singlemalloc = 1; \ 87 \ 88 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 89 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 90 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 91 a->maxnz += CHUNKSIZE; \ 92 a->reallocs++; \ 93 } \ 94 N = nrow++ - 1; a->nz++; \ 95 /* shift up all the later entries in this row */ \ 96 for ( ii=N; ii>=_i; ii-- ) { \ 97 rp[ii+1] = rp[ii]; \ 98 ap[ii+1] = ap[ii]; \ 99 } \ 100 rp[_i] = col1; \ 101 ap[_i] = value; \ 102 a_noinsert: ; \ 103 ailen[row] = nrow; \ 104 } 105 106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 107 { \ 108 \ 109 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 110 rmax = bimax[row]; nrow = bilen[row]; \ 111 col1 = col - shift; \ 112 \ 113 low = 0; high = nrow; \ 114 while (high-low > 5) { \ 115 t = (low+high)/2; \ 116 if (rp[t] > col) high = t; \ 117 else low = t; \ 118 } \ 119 for ( _i=0; _i<nrow; _i++ ) { \ 120 if (rp[_i] > col1) break; \ 121 if (rp[_i] == col1) { \ 122 if (addv == ADD_VALUES) ap[_i] += value; \ 123 else ap[_i] = value; \ 124 goto b_noinsert; \ 125 } \ 126 } \ 127 if (nonew == 1) goto b_noinsert; \ 128 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 129 if (nrow >= rmax) { \ 130 /* there is no extra room in row, therefore enlarge */ \ 131 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 132 Scalar *new_a; \ 133 \ 134 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 135 \ 136 /* malloc new storage space */ \ 137 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 138 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 139 new_j = (int *) (new_a + new_nz); \ 140 new_i = new_j + new_nz; \ 141 \ 142 /* copy over old data into new slots */ \ 143 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 144 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 145 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 146 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 147 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 148 len*sizeof(int)); \ 149 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 150 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 151 len*sizeof(Scalar)); \ 152 /* free up old matrix storage */ \ 153 \ 154 PetscFree(b->a); \ 155 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 156 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 157 b->singlemalloc = 1; \ 158 \ 159 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 160 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 161 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 162 b->maxnz += CHUNKSIZE; \ 163 b->reallocs++; \ 164 } \ 165 N = nrow++ - 1; b->nz++; \ 166 /* shift up all the later entries in this row */ \ 167 for ( ii=N; ii>=_i; ii-- ) { \ 168 rp[ii+1] = rp[ii]; \ 169 ap[ii+1] = ap[ii]; \ 170 } \ 171 rp[_i] = col1; \ 172 ap[_i] = value; \ 173 b_noinsert: ; \ 174 bilen[row] = nrow; \ 175 } 176 177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 178 #undef __FUNC__ 179 #define __FUNC__ "MatSetValues_MPIAIJ" 180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 181 { 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 183 Scalar value; 184 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 185 int cstart = aij->cstart, cend = aij->cend,row,col; 186 int roworiented = aij->roworiented; 187 188 /* Some Variables required in the macro */ 189 Mat A = aij->A; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 191 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 192 Scalar *aa = a->a; 193 194 Mat B = aij->B; 195 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 196 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 197 Scalar *ba = b->a; 198 199 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 200 int nonew = a->nonew,shift = a->indexshift; 201 Scalar *ap; 202 203 PetscFunctionBegin; 204 for ( i=0; i<m; i++ ) { 205 #if defined(USE_PETSC_BOPT_g) 206 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 207 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 208 #endif 209 if (im[i] >= rstart && im[i] < rend) { 210 row = im[i] - rstart; 211 for ( j=0; j<n; j++ ) { 212 if (in[j] >= cstart && in[j] < cend){ 213 col = in[j] - cstart; 214 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 215 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 216 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 217 } 218 #if defined(USE_PETSC_BOPT_g) 219 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 220 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 221 #endif 222 else { 223 if (mat->was_assembled) { 224 if (!aij->colmap) { 225 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 226 } 227 col = aij->colmap[in[j]] - 1; 228 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 229 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 230 col = in[j]; 231 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 232 B = aij->B; 233 b = (Mat_SeqAIJ *) B->data; 234 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 235 ba = b->a; 236 } 237 } else col = in[j]; 238 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 239 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 240 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 241 } 242 } 243 } 244 else { 245 if (roworiented && !aij->donotstash) { 246 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 247 } 248 else { 249 if (!aij->donotstash) { 250 row = im[i]; 251 for ( j=0; j<n; j++ ) { 252 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 253 } 254 } 255 } 256 } 257 } 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNC__ 262 #define __FUNC__ "MatGetValues_MPIAIJ" 263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 264 { 265 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 266 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 267 int cstart = aij->cstart, cend = aij->cend,row,col; 268 269 PetscFunctionBegin; 270 for ( i=0; i<m; i++ ) { 271 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 272 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 273 if (idxm[i] >= rstart && idxm[i] < rend) { 274 row = idxm[i] - rstart; 275 for ( j=0; j<n; j++ ) { 276 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 277 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 278 if (idxn[j] >= cstart && idxn[j] < cend){ 279 col = idxn[j] - cstart; 280 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 281 } 282 else { 283 if (!aij->colmap) { 284 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 285 } 286 col = aij->colmap[idxn[j]] - 1; 287 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 288 else { 289 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 290 } 291 } 292 } 293 } else { 294 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 295 } 296 } 297 PetscFunctionReturn(0); 298 } 299 300 #undef __FUNC__ 301 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 302 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 303 { 304 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 305 MPI_Comm comm = mat->comm; 306 int size = aij->size, *owners = aij->rowners; 307 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 308 MPI_Request *send_waits,*recv_waits; 309 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 310 InsertMode addv; 311 Scalar *rvalues,*svalues; 312 313 PetscFunctionBegin; 314 /* make sure all processors are either in INSERTMODE or ADDMODE */ 315 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 316 if (addv == (ADD_VALUES|INSERT_VALUES)) { 317 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 318 } 319 mat->insertmode = addv; /* in case this processor had no cache */ 320 321 /* first count number of contributors to each processor */ 322 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 323 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 324 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 325 for ( i=0; i<aij->stash.n; i++ ) { 326 idx = aij->stash.idx[i]; 327 for ( j=0; j<size; j++ ) { 328 if (idx >= owners[j] && idx < owners[j+1]) { 329 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 330 } 331 } 332 } 333 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 334 335 /* inform other processors of number of messages and max length*/ 336 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 337 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 338 nreceives = work[rank]; 339 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 340 nmax = work[rank]; 341 PetscFree(work); 342 343 /* post receives: 344 1) each message will consist of ordered pairs 345 (global index,value) we store the global index as a double 346 to simplify the message passing. 347 2) since we don't know how long each individual message is we 348 allocate the largest needed buffer for each receive. Potentially 349 this is a lot of wasted space. 350 351 352 This could be done better. 353 */ 354 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 355 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 356 for ( i=0; i<nreceives; i++ ) { 357 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 358 comm,recv_waits+i);CHKERRQ(ierr); 359 } 360 361 /* do sends: 362 1) starts[i] gives the starting index in svalues for stuff going to 363 the ith processor 364 */ 365 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 366 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 367 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 368 starts[0] = 0; 369 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 370 for ( i=0; i<aij->stash.n; i++ ) { 371 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 372 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 373 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 374 } 375 PetscFree(owner); 376 starts[0] = 0; 377 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 378 count = 0; 379 for ( i=0; i<size; i++ ) { 380 if (procs[i]) { 381 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 382 } 383 } 384 PetscFree(starts); PetscFree(nprocs); 385 386 /* Free cache space */ 387 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 388 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 389 390 aij->svalues = svalues; aij->rvalues = rvalues; 391 aij->nsends = nsends; aij->nrecvs = nreceives; 392 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 393 aij->rmax = nmax; 394 395 PetscFunctionReturn(0); 396 } 397 extern int MatSetUpMultiply_MPIAIJ(Mat); 398 399 #undef __FUNC__ 400 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 401 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 402 { 403 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 404 MPI_Status *send_status,recv_status; 405 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 406 int row,col,other_disassembled; 407 Scalar *values,val; 408 InsertMode addv = mat->insertmode; 409 410 PetscFunctionBegin; 411 /* wait on receives */ 412 while (count) { 413 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 414 /* unpack receives into our local space */ 415 values = aij->rvalues + 3*imdex*aij->rmax; 416 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 417 n = n/3; 418 for ( i=0; i<n; i++ ) { 419 row = (int) PetscReal(values[3*i]) - aij->rstart; 420 col = (int) PetscReal(values[3*i+1]); 421 val = values[3*i+2]; 422 if (col >= aij->cstart && col < aij->cend) { 423 col -= aij->cstart; 424 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 425 } else { 426 if (mat->was_assembled || mat->assembled) { 427 if (!aij->colmap) { 428 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 429 } 430 col = aij->colmap[col] - 1; 431 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 432 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 433 col = (int) PetscReal(values[3*i+1]); 434 } 435 } 436 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 437 } 438 } 439 count--; 440 } 441 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 442 443 /* wait on sends */ 444 if (aij->nsends) { 445 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 446 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 447 PetscFree(send_status); 448 } 449 PetscFree(aij->send_waits); PetscFree(aij->svalues); 450 451 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 452 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 453 454 /* determine if any processor has disassembled, if so we must 455 also disassemble ourselfs, in order that we may reassemble. */ 456 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 457 if (mat->was_assembled && !other_disassembled) { 458 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 459 } 460 461 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 462 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 463 } 464 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 465 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 466 467 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNC__ 472 #define __FUNC__ "MatZeroEntries_MPIAIJ" 473 int MatZeroEntries_MPIAIJ(Mat A) 474 { 475 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 476 int ierr; 477 478 PetscFunctionBegin; 479 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 480 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNC__ 485 #define __FUNC__ "MatZeroRows_MPIAIJ" 486 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 487 { 488 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 489 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 490 int *procs,*nprocs,j,found,idx,nsends,*work,row; 491 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 492 int *rvalues,tag = A->tag,count,base,slen,n,*source; 493 int *lens,imdex,*lrows,*values,rstart=l->rstart; 494 MPI_Comm comm = A->comm; 495 MPI_Request *send_waits,*recv_waits; 496 MPI_Status recv_status,*send_status; 497 IS istmp; 498 PetscTruth localdiag; 499 500 PetscFunctionBegin; 501 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 502 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 503 504 /* first count number of contributors to each processor */ 505 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 506 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 507 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 508 for ( i=0; i<N; i++ ) { 509 idx = rows[i]; 510 found = 0; 511 for ( j=0; j<size; j++ ) { 512 if (idx >= owners[j] && idx < owners[j+1]) { 513 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 514 } 515 } 516 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 517 } 518 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 519 520 /* inform other processors of number of messages and max length*/ 521 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 522 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 523 nrecvs = work[rank]; 524 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 525 nmax = work[rank]; 526 PetscFree(work); 527 528 /* post receives: */ 529 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 530 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 531 for ( i=0; i<nrecvs; i++ ) { 532 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 533 } 534 535 /* do sends: 536 1) starts[i] gives the starting index in svalues for stuff going to 537 the ith processor 538 */ 539 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 540 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 541 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 542 starts[0] = 0; 543 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 544 for ( i=0; i<N; i++ ) { 545 svalues[starts[owner[i]]++] = rows[i]; 546 } 547 ISRestoreIndices(is,&rows); 548 549 starts[0] = 0; 550 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 551 count = 0; 552 for ( i=0; i<size; i++ ) { 553 if (procs[i]) { 554 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 555 } 556 } 557 PetscFree(starts); 558 559 base = owners[rank]; 560 561 /* wait on receives */ 562 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 563 source = lens + nrecvs; 564 count = nrecvs; slen = 0; 565 while (count) { 566 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 567 /* unpack receives into our local space */ 568 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 569 source[imdex] = recv_status.MPI_SOURCE; 570 lens[imdex] = n; 571 slen += n; 572 count--; 573 } 574 PetscFree(recv_waits); 575 576 /* move the data into the send scatter */ 577 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 578 count = 0; 579 for ( i=0; i<nrecvs; i++ ) { 580 values = rvalues + i*nmax; 581 for ( j=0; j<lens[i]; j++ ) { 582 lrows[count++] = values[j] - base; 583 } 584 } 585 PetscFree(rvalues); PetscFree(lens); 586 PetscFree(owner); PetscFree(nprocs); 587 588 /* actually zap the local rows */ 589 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 590 PLogObjectParent(A,istmp); 591 592 /* 593 Zero the required rows. If the "diagonal block" of the matrix 594 is square and the user wishes to set the diagonal we use seperate 595 code so that MatSetValues() is not called for each diagonal allocating 596 new memory, thus calling lots of mallocs and slowing things down. 597 598 Contributed by: Mathew Knepley 599 */ 600 localdiag = PETSC_FALSE; 601 if (diag && (l->A->M == l->A->N)) { 602 localdiag = PETSC_TRUE; 603 ierr = MatZeroRows(l->A,istmp,diag); CHKERRQ(ierr); 604 } else { 605 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 606 } 607 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 608 ierr = ISDestroy(istmp); CHKERRQ(ierr); 609 610 if (diag && (localdiag == PETSC_FALSE)) { 611 for ( i = 0; i < slen; i++ ) { 612 row = lrows[i] + rstart; 613 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 614 } 615 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 616 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 617 } 618 619 ierr = ISDestroy(istmp); CHKERRQ(ierr); 620 621 if (diag) { 622 for ( i = 0; i < slen; i++ ) { 623 row = lrows[i] + rstart; 624 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 625 } 626 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 627 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 628 } 629 PetscFree(lrows); 630 /* wait on sends */ 631 if (nsends) { 632 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 633 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 634 PetscFree(send_status); 635 } 636 PetscFree(send_waits); PetscFree(svalues); 637 638 PetscFunctionReturn(0); 639 } 640 641 #undef __FUNC__ 642 #define __FUNC__ "MatMult_MPIAIJ" 643 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 644 { 645 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 646 int ierr,nt; 647 648 PetscFunctionBegin; 649 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 650 if (nt != a->n) { 651 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 652 } 653 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 654 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 655 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 656 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 657 PetscFunctionReturn(0); 658 } 659 660 #undef __FUNC__ 661 #define __FUNC__ "MatMultAdd_MPIAIJ" 662 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 663 { 664 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 665 int ierr; 666 667 PetscFunctionBegin; 668 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 669 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 670 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 671 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 672 PetscFunctionReturn(0); 673 } 674 675 #undef __FUNC__ 676 #define __FUNC__ "MatMultTrans_MPIAIJ" 677 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 678 { 679 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 680 int ierr; 681 682 PetscFunctionBegin; 683 /* do nondiagonal part */ 684 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 685 /* send it on its way */ 686 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 687 /* do local part */ 688 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 689 /* receive remote parts: note this assumes the values are not actually */ 690 /* inserted in yy until the next line, which is true for my implementation*/ 691 /* but is not perhaps always true. */ 692 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 693 PetscFunctionReturn(0); 694 } 695 696 #undef __FUNC__ 697 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 698 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 699 { 700 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 701 int ierr; 702 703 PetscFunctionBegin; 704 /* do nondiagonal part */ 705 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 706 /* send it on its way */ 707 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 708 /* do local part */ 709 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 710 /* receive remote parts: note this assumes the values are not actually */ 711 /* inserted in yy until the next line, which is true for my implementation*/ 712 /* but is not perhaps always true. */ 713 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 714 PetscFunctionReturn(0); 715 } 716 717 /* 718 This only works correctly for square matrices where the subblock A->A is the 719 diagonal block 720 */ 721 #undef __FUNC__ 722 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 723 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 724 { 725 int ierr; 726 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 727 728 PetscFunctionBegin; 729 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 730 if (a->rstart != a->cstart || a->rend != a->cend) { 731 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 732 } 733 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 734 PetscFunctionReturn(0); 735 } 736 737 #undef __FUNC__ 738 #define __FUNC__ "MatScale_MPIAIJ" 739 int MatScale_MPIAIJ(Scalar *aa,Mat A) 740 { 741 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 742 int ierr; 743 744 PetscFunctionBegin; 745 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 746 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNC__ 751 #define __FUNC__ "MatDestroy_MPIAIJ" 752 int MatDestroy_MPIAIJ(Mat mat) 753 { 754 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 755 int ierr; 756 757 PetscFunctionBegin; 758 #if defined(USE_PETSC_LOG) 759 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 760 #endif 761 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 762 PetscFree(aij->rowners); 763 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 764 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 765 if (aij->colmap) PetscFree(aij->colmap); 766 if (aij->garray) PetscFree(aij->garray); 767 if (aij->lvec) VecDestroy(aij->lvec); 768 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 769 if (aij->rowvalues) PetscFree(aij->rowvalues); 770 PetscFree(aij); 771 PLogObjectDestroy(mat); 772 PetscHeaderDestroy(mat); 773 PetscFunctionReturn(0); 774 } 775 776 #undef __FUNC__ 777 #define __FUNC__ "MatView_MPIAIJ_Binary" 778 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 779 { 780 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 781 int ierr; 782 783 PetscFunctionBegin; 784 if (aij->size == 1) { 785 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 786 } 787 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 788 PetscFunctionReturn(0); 789 } 790 791 #undef __FUNC__ 792 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 793 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 794 { 795 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 796 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 797 int ierr, format,shift = C->indexshift,rank; 798 FILE *fd; 799 ViewerType vtype; 800 801 PetscFunctionBegin; 802 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 803 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 804 ierr = ViewerGetFormat(viewer,&format); 805 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 806 MatInfo info; 807 int flg; 808 MPI_Comm_rank(mat->comm,&rank); 809 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 810 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 811 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 812 PetscSequentialPhaseBegin(mat->comm,1); 813 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 814 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 815 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 816 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 817 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 818 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 819 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 820 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 821 fflush(fd); 822 PetscSequentialPhaseEnd(mat->comm,1); 823 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 824 PetscFunctionReturn(0); 825 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 826 PetscFunctionReturn(0); 827 } 828 } 829 830 if (vtype == DRAW_VIEWER) { 831 Draw draw; 832 PetscTruth isnull; 833 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 834 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 835 } 836 837 if (vtype == ASCII_FILE_VIEWER) { 838 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 839 PetscSequentialPhaseBegin(mat->comm,1); 840 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 841 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 842 aij->cend); 843 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 844 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 845 fflush(fd); 846 PetscSequentialPhaseEnd(mat->comm,1); 847 } else { 848 int size = aij->size; 849 rank = aij->rank; 850 if (size == 1) { 851 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 852 } else { 853 /* assemble the entire matrix onto first processor. */ 854 Mat A; 855 Mat_SeqAIJ *Aloc; 856 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 857 Scalar *a; 858 859 if (!rank) { 860 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 861 } else { 862 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 863 } 864 PLogObjectParent(mat,A); 865 866 /* copy over the A part */ 867 Aloc = (Mat_SeqAIJ*) aij->A->data; 868 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 869 row = aij->rstart; 870 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 871 for ( i=0; i<m; i++ ) { 872 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 873 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 874 } 875 aj = Aloc->j; 876 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 877 878 /* copy over the B part */ 879 Aloc = (Mat_SeqAIJ*) aij->B->data; 880 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 881 row = aij->rstart; 882 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 883 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 884 for ( i=0; i<m; i++ ) { 885 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 886 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 887 } 888 PetscFree(ct); 889 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 890 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 891 /* 892 Everyone has to call to draw the matrix since the graphics waits are 893 synchronized across all processors that share the Draw object 894 */ 895 if (!rank || vtype == DRAW_VIEWER) { 896 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 897 } 898 ierr = MatDestroy(A); CHKERRQ(ierr); 899 } 900 } 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNC__ 905 #define __FUNC__ "MatView_MPIAIJ" 906 int MatView_MPIAIJ(Mat mat,Viewer viewer) 907 { 908 int ierr; 909 ViewerType vtype; 910 911 PetscFunctionBegin; 912 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 913 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 914 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 915 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 916 } else if (vtype == BINARY_FILE_VIEWER) { 917 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 918 } else { 919 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 920 } 921 PetscFunctionReturn(0); 922 } 923 924 /* 925 This has to provide several versions. 926 927 2) a) use only local smoothing updating outer values only once. 928 b) local smoothing updating outer values each inner iteration 929 3) color updating out values betwen colors. 930 */ 931 #undef __FUNC__ 932 #define __FUNC__ "MatRelax_MPIAIJ" 933 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 934 double fshift,int its,Vec xx) 935 { 936 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 937 Mat AA = mat->A, BB = mat->B; 938 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 939 Scalar *b,*x,*xs,*ls,d,*v,sum; 940 int ierr,*idx, *diag; 941 int n = mat->n, m = mat->m, i,shift = A->indexshift; 942 943 PetscFunctionBegin; 944 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 945 xs = x + shift; /* shift by one for index start of 1 */ 946 ls = ls + shift; 947 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 948 diag = A->diag; 949 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 950 if (flag & SOR_ZERO_INITIAL_GUESS) { 951 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 952 PetscFunctionReturn(0); 953 } 954 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 955 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 956 while (its--) { 957 /* go down through the rows */ 958 for ( i=0; i<m; i++ ) { 959 n = A->i[i+1] - A->i[i]; 960 idx = A->j + A->i[i] + shift; 961 v = A->a + A->i[i] + shift; 962 sum = b[i]; 963 SPARSEDENSEMDOT(sum,xs,v,idx,n); 964 d = fshift + A->a[diag[i]+shift]; 965 n = B->i[i+1] - B->i[i]; 966 idx = B->j + B->i[i] + shift; 967 v = B->a + B->i[i] + shift; 968 SPARSEDENSEMDOT(sum,ls,v,idx,n); 969 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 970 } 971 /* come up through the rows */ 972 for ( i=m-1; i>-1; i-- ) { 973 n = A->i[i+1] - A->i[i]; 974 idx = A->j + A->i[i] + shift; 975 v = A->a + A->i[i] + shift; 976 sum = b[i]; 977 SPARSEDENSEMDOT(sum,xs,v,idx,n); 978 d = fshift + A->a[diag[i]+shift]; 979 n = B->i[i+1] - B->i[i]; 980 idx = B->j + B->i[i] + shift; 981 v = B->a + B->i[i] + shift; 982 SPARSEDENSEMDOT(sum,ls,v,idx,n); 983 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 984 } 985 } 986 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 987 if (flag & SOR_ZERO_INITIAL_GUESS) { 988 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 989 PetscFunctionReturn(0); 990 } 991 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 992 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 993 while (its--) { 994 for ( i=0; i<m; i++ ) { 995 n = A->i[i+1] - A->i[i]; 996 idx = A->j + A->i[i] + shift; 997 v = A->a + A->i[i] + shift; 998 sum = b[i]; 999 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1000 d = fshift + A->a[diag[i]+shift]; 1001 n = B->i[i+1] - B->i[i]; 1002 idx = B->j + B->i[i] + shift; 1003 v = B->a + B->i[i] + shift; 1004 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1005 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1006 } 1007 } 1008 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1009 if (flag & SOR_ZERO_INITIAL_GUESS) { 1010 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 1011 PetscFunctionReturn(0); 1012 } 1013 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 1014 mat->Mvctx); CHKERRQ(ierr); 1015 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 1016 mat->Mvctx); CHKERRQ(ierr); 1017 while (its--) { 1018 for ( i=m-1; i>-1; i-- ) { 1019 n = A->i[i+1] - A->i[i]; 1020 idx = A->j + A->i[i] + shift; 1021 v = A->a + A->i[i] + shift; 1022 sum = b[i]; 1023 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1024 d = fshift + A->a[diag[i]+shift]; 1025 n = B->i[i+1] - B->i[i]; 1026 idx = B->j + B->i[i] + shift; 1027 v = B->a + B->i[i] + shift; 1028 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1029 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1030 } 1031 } 1032 } else { 1033 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1034 } 1035 PetscFunctionReturn(0); 1036 } 1037 1038 #undef __FUNC__ 1039 #define __FUNC__ "MatGetInfo_MPIAIJ" 1040 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1041 { 1042 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1043 Mat A = mat->A, B = mat->B; 1044 int ierr; 1045 double isend[5], irecv[5]; 1046 1047 PetscFunctionBegin; 1048 info->block_size = 1.0; 1049 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1050 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1051 isend[3] = info->memory; isend[4] = info->mallocs; 1052 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1053 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1054 isend[3] += info->memory; isend[4] += info->mallocs; 1055 if (flag == MAT_LOCAL) { 1056 info->nz_used = isend[0]; 1057 info->nz_allocated = isend[1]; 1058 info->nz_unneeded = isend[2]; 1059 info->memory = isend[3]; 1060 info->mallocs = isend[4]; 1061 } else if (flag == MAT_GLOBAL_MAX) { 1062 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1063 info->nz_used = irecv[0]; 1064 info->nz_allocated = irecv[1]; 1065 info->nz_unneeded = irecv[2]; 1066 info->memory = irecv[3]; 1067 info->mallocs = irecv[4]; 1068 } else if (flag == MAT_GLOBAL_SUM) { 1069 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1070 info->nz_used = irecv[0]; 1071 info->nz_allocated = irecv[1]; 1072 info->nz_unneeded = irecv[2]; 1073 info->memory = irecv[3]; 1074 info->mallocs = irecv[4]; 1075 } 1076 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1077 info->fill_ratio_needed = 0; 1078 info->factor_mallocs = 0; 1079 info->rows_global = (double)mat->M; 1080 info->columns_global = (double)mat->N; 1081 info->rows_local = (double)mat->m; 1082 info->columns_local = (double)mat->N; 1083 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNC__ 1088 #define __FUNC__ "MatSetOption_MPIAIJ" 1089 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1090 { 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1092 1093 PetscFunctionBegin; 1094 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1095 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1096 op == MAT_COLUMNS_UNSORTED || 1097 op == MAT_COLUMNS_SORTED || 1098 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1099 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1100 MatSetOption(a->A,op); 1101 MatSetOption(a->B,op); 1102 } else if (op == MAT_ROW_ORIENTED) { 1103 a->roworiented = 1; 1104 MatSetOption(a->A,op); 1105 MatSetOption(a->B,op); 1106 } else if (op == MAT_ROWS_SORTED || 1107 op == MAT_ROWS_UNSORTED || 1108 op == MAT_SYMMETRIC || 1109 op == MAT_STRUCTURALLY_SYMMETRIC || 1110 op == MAT_YES_NEW_DIAGONALS) 1111 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1112 else if (op == MAT_COLUMN_ORIENTED) { 1113 a->roworiented = 0; 1114 MatSetOption(a->A,op); 1115 MatSetOption(a->B,op); 1116 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1117 a->donotstash = 1; 1118 } else if (op == MAT_NO_NEW_DIAGONALS){ 1119 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1120 } else { 1121 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1122 } 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNC__ 1127 #define __FUNC__ "MatGetSize_MPIAIJ" 1128 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1129 { 1130 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1131 1132 PetscFunctionBegin; 1133 if (m) *m = mat->M; 1134 if (n) *n = mat->N; 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNC__ 1139 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1140 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1141 { 1142 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1143 1144 PetscFunctionBegin; 1145 if (m) *m = mat->m; 1146 if (n) *n = mat->n; 1147 PetscFunctionReturn(0); 1148 } 1149 1150 #undef __FUNC__ 1151 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1152 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1153 { 1154 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1155 1156 PetscFunctionBegin; 1157 *m = mat->rstart; *n = mat->rend; 1158 PetscFunctionReturn(0); 1159 } 1160 1161 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1162 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1163 1164 #undef __FUNC__ 1165 #define __FUNC__ "MatGetRow_MPIAIJ" 1166 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1167 { 1168 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1169 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1170 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1171 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1172 int *cmap, *idx_p; 1173 1174 PetscFunctionBegin; 1175 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1176 mat->getrowactive = PETSC_TRUE; 1177 1178 if (!mat->rowvalues && (idx || v)) { 1179 /* 1180 allocate enough space to hold information from the longest row. 1181 */ 1182 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1183 int max = 1,m = mat->m,tmp; 1184 for ( i=0; i<m; i++ ) { 1185 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1186 if (max < tmp) { max = tmp; } 1187 } 1188 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1189 CHKPTRQ(mat->rowvalues); 1190 mat->rowindices = (int *) (mat->rowvalues + max); 1191 } 1192 1193 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1194 lrow = row - rstart; 1195 1196 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1197 if (!v) {pvA = 0; pvB = 0;} 1198 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1199 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1200 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1201 nztot = nzA + nzB; 1202 1203 cmap = mat->garray; 1204 if (v || idx) { 1205 if (nztot) { 1206 /* Sort by increasing column numbers, assuming A and B already sorted */ 1207 int imark = -1; 1208 if (v) { 1209 *v = v_p = mat->rowvalues; 1210 for ( i=0; i<nzB; i++ ) { 1211 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1212 else break; 1213 } 1214 imark = i; 1215 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1216 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1217 } 1218 if (idx) { 1219 *idx = idx_p = mat->rowindices; 1220 if (imark > -1) { 1221 for ( i=0; i<imark; i++ ) { 1222 idx_p[i] = cmap[cworkB[i]]; 1223 } 1224 } else { 1225 for ( i=0; i<nzB; i++ ) { 1226 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1227 else break; 1228 } 1229 imark = i; 1230 } 1231 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1232 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1233 } 1234 } 1235 else { 1236 if (idx) *idx = 0; 1237 if (v) *v = 0; 1238 } 1239 } 1240 *nz = nztot; 1241 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1242 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNC__ 1247 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1248 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1249 { 1250 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1251 1252 PetscFunctionBegin; 1253 if (aij->getrowactive == PETSC_FALSE) { 1254 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1255 } 1256 aij->getrowactive = PETSC_FALSE; 1257 PetscFunctionReturn(0); 1258 } 1259 1260 #undef __FUNC__ 1261 #define __FUNC__ "MatNorm_MPIAIJ" 1262 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1263 { 1264 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1265 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1266 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1267 double sum = 0.0; 1268 Scalar *v; 1269 1270 PetscFunctionBegin; 1271 if (aij->size == 1) { 1272 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1273 } else { 1274 if (type == NORM_FROBENIUS) { 1275 v = amat->a; 1276 for (i=0; i<amat->nz; i++ ) { 1277 #if defined(USE_PETSC_COMPLEX) 1278 sum += real(conj(*v)*(*v)); v++; 1279 #else 1280 sum += (*v)*(*v); v++; 1281 #endif 1282 } 1283 v = bmat->a; 1284 for (i=0; i<bmat->nz; i++ ) { 1285 #if defined(USE_PETSC_COMPLEX) 1286 sum += real(conj(*v)*(*v)); v++; 1287 #else 1288 sum += (*v)*(*v); v++; 1289 #endif 1290 } 1291 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1292 *norm = sqrt(*norm); 1293 } else if (type == NORM_1) { /* max column norm */ 1294 double *tmp, *tmp2; 1295 int *jj, *garray = aij->garray; 1296 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1297 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1298 PetscMemzero(tmp,aij->N*sizeof(double)); 1299 *norm = 0.0; 1300 v = amat->a; jj = amat->j; 1301 for ( j=0; j<amat->nz; j++ ) { 1302 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1303 } 1304 v = bmat->a; jj = bmat->j; 1305 for ( j=0; j<bmat->nz; j++ ) { 1306 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1307 } 1308 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1309 for ( j=0; j<aij->N; j++ ) { 1310 if (tmp2[j] > *norm) *norm = tmp2[j]; 1311 } 1312 PetscFree(tmp); PetscFree(tmp2); 1313 } else if (type == NORM_INFINITY) { /* max row norm */ 1314 double ntemp = 0.0; 1315 for ( j=0; j<amat->m; j++ ) { 1316 v = amat->a + amat->i[j] + shift; 1317 sum = 0.0; 1318 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1319 sum += PetscAbsScalar(*v); v++; 1320 } 1321 v = bmat->a + bmat->i[j] + shift; 1322 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1323 sum += PetscAbsScalar(*v); v++; 1324 } 1325 if (sum > ntemp) ntemp = sum; 1326 } 1327 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1328 } else { 1329 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1330 } 1331 } 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #undef __FUNC__ 1336 #define __FUNC__ "MatTranspose_MPIAIJ" 1337 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1338 { 1339 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1340 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1341 int ierr,shift = Aloc->indexshift; 1342 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1343 Mat B; 1344 Scalar *array; 1345 1346 PetscFunctionBegin; 1347 if (matout == PETSC_NULL && M != N) { 1348 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1349 } 1350 1351 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1352 1353 /* copy over the A part */ 1354 Aloc = (Mat_SeqAIJ*) a->A->data; 1355 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1356 row = a->rstart; 1357 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1358 for ( i=0; i<m; i++ ) { 1359 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1360 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1361 } 1362 aj = Aloc->j; 1363 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1364 1365 /* copy over the B part */ 1366 Aloc = (Mat_SeqAIJ*) a->B->data; 1367 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1368 row = a->rstart; 1369 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1370 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1371 for ( i=0; i<m; i++ ) { 1372 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1373 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1374 } 1375 PetscFree(ct); 1376 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1377 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1378 if (matout != PETSC_NULL) { 1379 *matout = B; 1380 } else { 1381 PetscOps *Abops; 1382 struct _MatOps *Aops; 1383 1384 /* This isn't really an in-place transpose .... but free data structures from a */ 1385 PetscFree(a->rowners); 1386 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1387 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1388 if (a->colmap) PetscFree(a->colmap); 1389 if (a->garray) PetscFree(a->garray); 1390 if (a->lvec) VecDestroy(a->lvec); 1391 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1392 PetscFree(a); 1393 1394 /* 1395 This is horrible, horrible code. We need to keep the 1396 A pointers for the bops and ops but copy everything 1397 else from C. 1398 */ 1399 Abops = A->bops; 1400 Aops = A->ops; 1401 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1402 A->bops = Abops; 1403 A->ops = Aops; 1404 PetscHeaderDestroy(B); 1405 } 1406 PetscFunctionReturn(0); 1407 } 1408 1409 #undef __FUNC__ 1410 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1411 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1412 { 1413 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1414 Mat a = aij->A, b = aij->B; 1415 int ierr,s1,s2,s3; 1416 1417 PetscFunctionBegin; 1418 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1419 if (rr) { 1420 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1421 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1422 /* Overlap communication with computation. */ 1423 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1424 } 1425 if (ll) { 1426 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1427 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1428 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1429 } 1430 /* scale the diagonal block */ 1431 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1432 1433 if (rr) { 1434 /* Do a scatter end and then right scale the off-diagonal block */ 1435 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1436 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1437 } 1438 1439 PetscFunctionReturn(0); 1440 } 1441 1442 1443 extern int MatPrintHelp_SeqAIJ(Mat); 1444 #undef __FUNC__ 1445 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1446 int MatPrintHelp_MPIAIJ(Mat A) 1447 { 1448 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1449 int ierr; 1450 1451 PetscFunctionBegin; 1452 if (!a->rank) { 1453 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1454 } 1455 PetscFunctionReturn(0); 1456 } 1457 1458 #undef __FUNC__ 1459 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1460 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1461 { 1462 PetscFunctionBegin; 1463 *bs = 1; 1464 PetscFunctionReturn(0); 1465 } 1466 #undef __FUNC__ 1467 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1468 int MatSetUnfactored_MPIAIJ(Mat A) 1469 { 1470 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1471 int ierr; 1472 1473 PetscFunctionBegin; 1474 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } 1477 1478 #undef __FUNC__ 1479 #define __FUNC__ "MatEqual_MPIAIJ" 1480 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1481 { 1482 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1483 Mat a, b, c, d; 1484 PetscTruth flg; 1485 int ierr; 1486 1487 PetscFunctionBegin; 1488 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1489 a = matA->A; b = matA->B; 1490 c = matB->A; d = matB->B; 1491 1492 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1493 if (flg == PETSC_TRUE) { 1494 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1495 } 1496 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } 1499 1500 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1501 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1502 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1503 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1504 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *); 1505 1506 /* -------------------------------------------------------------------*/ 1507 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1508 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1509 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1510 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1511 0,0, 1512 0,0, 1513 0,0, 1514 MatRelax_MPIAIJ, 1515 MatTranspose_MPIAIJ, 1516 MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1517 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1518 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1519 0, 1520 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1521 0,0,0,0, 1522 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1523 0,0, 1524 0,0,MatConvertSameType_MPIAIJ,0,0, 1525 0,0,0, 1526 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1527 MatPrintHelp_MPIAIJ, 1528 MatScale_MPIAIJ,0,0,0, 1529 MatGetBlockSize_MPIAIJ,0,0,0,0, 1530 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ, 1531 0,0,MatGetSubMatrix_MPIAIJ}; 1532 1533 1534 #undef __FUNC__ 1535 #define __FUNC__ "MatCreateMPIAIJ" 1536 /*@C 1537 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1538 (the default parallel PETSc format). For good matrix assembly performance 1539 the user should preallocate the matrix storage by setting the parameters 1540 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1541 performance can be increased by more than a factor of 50. 1542 1543 Collective on MPI_Comm 1544 1545 Input Parameters: 1546 + comm - MPI communicator 1547 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1548 This value should be the same as the local size used in creating the 1549 y vector for the matrix-vector product y = Ax. 1550 . n - This value should be the same as the local size used in creating the 1551 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1552 calculated if N is given) For square matrices n is almost always m. 1553 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1554 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1555 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1556 (same for all local rows) 1557 . d_nzz - array containing the number of nonzeros in the various rows of the 1558 diagonal portion of the local submatrix (possibly different for each row) 1559 or PETSC_NULL. You must leave room for the diagonal entry even if 1560 it is zero. 1561 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1562 submatrix (same for all local rows). 1563 - o_nzz - array containing the number of nonzeros in the various rows of the 1564 off-diagonal portion of the local submatrix (possibly different for 1565 each row) or PETSC_NULL. 1566 1567 Output Parameter: 1568 . A - the matrix 1569 1570 Notes: 1571 The AIJ format (also called the Yale sparse matrix format or 1572 compressed row storage), is fully compatible with standard Fortran 77 1573 storage. That is, the stored row and column indices can begin at 1574 either one (as in Fortran) or zero. See the users manual for details. 1575 1576 The user MUST specify either the local or global matrix dimensions 1577 (possibly both). 1578 1579 By default, this format uses inodes (identical nodes) when possible. 1580 We search for consecutive rows with the same nonzero structure, thereby 1581 reusing matrix information to achieve increased efficiency. 1582 1583 Options Database Keys: 1584 + -mat_aij_no_inode - Do not use inodes 1585 . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5) 1586 - -mat_aij_oneindex - Internally use indexing starting at 1 1587 rather than 0. Note that when calling MatSetValues(), 1588 the user still MUST index entries starting at 0! 1589 1590 Storage Information: 1591 For a square global matrix we define each processor's diagonal portion 1592 to be its local rows and the corresponding columns (a square submatrix); 1593 each processor's off-diagonal portion encompasses the remainder of the 1594 local matrix (a rectangular submatrix). 1595 1596 The user can specify preallocated storage for the diagonal part of 1597 the local submatrix with either d_nz or d_nnz (not both). Set 1598 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1599 memory allocation. Likewise, specify preallocated storage for the 1600 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1601 1602 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1603 the figure below we depict these three local rows and all columns (0-11). 1604 1605 .vb 1606 0 1 2 3 4 5 6 7 8 9 10 11 1607 ------------------- 1608 row 3 | o o o d d d o o o o o o 1609 row 4 | o o o d d d o o o o o o 1610 row 5 | o o o d d d o o o o o o 1611 ------------------- 1612 .ve 1613 1614 Thus, any entries in the d locations are stored in the d (diagonal) 1615 submatrix, and any entries in the o locations are stored in the 1616 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1617 stored simply in the MATSEQAIJ format for compressed row storage. 1618 1619 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1620 and o_nz should indicate the number of nonzeros per row in the o matrix. 1621 In general, for PDE problems in which most nonzeros are near the diagonal, 1622 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1623 or you will get TERRIBLE performance; see the users' manual chapter on 1624 matrices. 1625 1626 .keywords: matrix, aij, compressed row, sparse, parallel 1627 1628 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1629 @*/ 1630 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1631 { 1632 Mat B; 1633 Mat_MPIAIJ *b; 1634 int ierr, i,sum[2],work[2],size; 1635 1636 PetscFunctionBegin; 1637 MPI_Comm_size(comm,&size); 1638 if (size == 1) { 1639 if (M == PETSC_DECIDE) M = m; 1640 if (N == PETSC_DECIDE) N = n; 1641 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1642 PetscFunctionReturn(0); 1643 } 1644 1645 *A = 0; 1646 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1647 PLogObjectCreate(B); 1648 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1649 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1650 PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps)); 1651 B->ops->destroy = MatDestroy_MPIAIJ; 1652 B->ops->view = MatView_MPIAIJ; 1653 B->factor = 0; 1654 B->assembled = PETSC_FALSE; 1655 B->mapping = 0; 1656 1657 B->insertmode = NOT_SET_VALUES; 1658 b->size = size; 1659 MPI_Comm_rank(comm,&b->rank); 1660 1661 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1662 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1663 } 1664 1665 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1666 work[0] = m; work[1] = n; 1667 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1668 if (M == PETSC_DECIDE) M = sum[0]; 1669 if (N == PETSC_DECIDE) N = sum[1]; 1670 } 1671 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1672 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1673 b->m = m; B->m = m; 1674 b->n = n; B->n = n; 1675 b->N = N; B->N = N; 1676 b->M = M; B->M = M; 1677 1678 /* build local table of row and column ownerships */ 1679 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1680 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1681 b->cowners = b->rowners + b->size + 2; 1682 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1683 b->rowners[0] = 0; 1684 for ( i=2; i<=b->size; i++ ) { 1685 b->rowners[i] += b->rowners[i-1]; 1686 } 1687 b->rstart = b->rowners[b->rank]; 1688 b->rend = b->rowners[b->rank+1]; 1689 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1690 b->cowners[0] = 0; 1691 for ( i=2; i<=b->size; i++ ) { 1692 b->cowners[i] += b->cowners[i-1]; 1693 } 1694 b->cstart = b->cowners[b->rank]; 1695 b->cend = b->cowners[b->rank+1]; 1696 1697 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1698 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1699 PLogObjectParent(B,b->A); 1700 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1701 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1702 PLogObjectParent(B,b->B); 1703 1704 /* build cache for off array entries formed */ 1705 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1706 b->donotstash = 0; 1707 b->colmap = 0; 1708 b->garray = 0; 1709 b->roworiented = 1; 1710 1711 /* stuff used for matrix vector multiply */ 1712 b->lvec = 0; 1713 b->Mvctx = 0; 1714 1715 /* stuff for MatGetRow() */ 1716 b->rowindices = 0; 1717 b->rowvalues = 0; 1718 b->getrowactive = PETSC_FALSE; 1719 1720 *A = B; 1721 PetscFunctionReturn(0); 1722 } 1723 1724 #undef __FUNC__ 1725 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1726 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1727 { 1728 Mat mat; 1729 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1730 int ierr, len=0, flg; 1731 1732 PetscFunctionBegin; 1733 *newmat = 0; 1734 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1735 PLogObjectCreate(mat); 1736 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1737 PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps)); 1738 mat->ops->destroy = MatDestroy_MPIAIJ; 1739 mat->ops->view = MatView_MPIAIJ; 1740 mat->factor = matin->factor; 1741 mat->assembled = PETSC_TRUE; 1742 1743 a->m = mat->m = oldmat->m; 1744 a->n = mat->n = oldmat->n; 1745 a->M = mat->M = oldmat->M; 1746 a->N = mat->N = oldmat->N; 1747 1748 a->rstart = oldmat->rstart; 1749 a->rend = oldmat->rend; 1750 a->cstart = oldmat->cstart; 1751 a->cend = oldmat->cend; 1752 a->size = oldmat->size; 1753 a->rank = oldmat->rank; 1754 mat->insertmode = NOT_SET_VALUES; 1755 a->rowvalues = 0; 1756 a->getrowactive = PETSC_FALSE; 1757 1758 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1759 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1760 a->cowners = a->rowners + a->size + 2; 1761 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1762 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1763 if (oldmat->colmap) { 1764 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1765 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1766 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1767 } else a->colmap = 0; 1768 if (oldmat->garray) { 1769 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1770 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1771 PLogObjectMemory(mat,len*sizeof(int)); 1772 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1773 } else a->garray = 0; 1774 1775 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1776 PLogObjectParent(mat,a->lvec); 1777 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1778 PLogObjectParent(mat,a->Mvctx); 1779 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1780 PLogObjectParent(mat,a->A); 1781 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1782 PLogObjectParent(mat,a->B); 1783 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1784 if (flg) { 1785 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1786 } 1787 *newmat = mat; 1788 PetscFunctionReturn(0); 1789 } 1790 1791 #include "sys.h" 1792 1793 #undef __FUNC__ 1794 #define __FUNC__ "MatLoad_MPIAIJ" 1795 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1796 { 1797 Mat A; 1798 Scalar *vals,*svals; 1799 MPI_Comm comm = ((PetscObject)viewer)->comm; 1800 MPI_Status status; 1801 int i, nz, ierr, j,rstart, rend, fd; 1802 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1803 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1804 int tag = ((PetscObject)viewer)->tag; 1805 1806 PetscFunctionBegin; 1807 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1808 if (!rank) { 1809 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1810 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1811 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1812 if (header[3] < 0) { 1813 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1814 } 1815 } 1816 1817 1818 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1819 M = header[1]; N = header[2]; 1820 /* determine ownership of all rows */ 1821 m = M/size + ((M % size) > rank); 1822 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1823 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1824 rowners[0] = 0; 1825 for ( i=2; i<=size; i++ ) { 1826 rowners[i] += rowners[i-1]; 1827 } 1828 rstart = rowners[rank]; 1829 rend = rowners[rank+1]; 1830 1831 /* distribute row lengths to all processors */ 1832 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1833 offlens = ourlens + (rend-rstart); 1834 if (!rank) { 1835 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1836 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1837 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1838 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1839 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1840 PetscFree(sndcounts); 1841 } else { 1842 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1843 } 1844 1845 if (!rank) { 1846 /* calculate the number of nonzeros on each processor */ 1847 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1848 PetscMemzero(procsnz,size*sizeof(int)); 1849 for ( i=0; i<size; i++ ) { 1850 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1851 procsnz[i] += rowlengths[j]; 1852 } 1853 } 1854 PetscFree(rowlengths); 1855 1856 /* determine max buffer needed and allocate it */ 1857 maxnz = 0; 1858 for ( i=0; i<size; i++ ) { 1859 maxnz = PetscMax(maxnz,procsnz[i]); 1860 } 1861 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1862 1863 /* read in my part of the matrix column indices */ 1864 nz = procsnz[0]; 1865 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1866 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1867 1868 /* read in every one elses and ship off */ 1869 for ( i=1; i<size; i++ ) { 1870 nz = procsnz[i]; 1871 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1872 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1873 } 1874 PetscFree(cols); 1875 } else { 1876 /* determine buffer space needed for message */ 1877 nz = 0; 1878 for ( i=0; i<m; i++ ) { 1879 nz += ourlens[i]; 1880 } 1881 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1882 1883 /* receive message of column indices*/ 1884 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1885 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1886 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1887 } 1888 1889 /* loop over local rows, determining number of off diagonal entries */ 1890 PetscMemzero(offlens,m*sizeof(int)); 1891 jj = 0; 1892 for ( i=0; i<m; i++ ) { 1893 for ( j=0; j<ourlens[i]; j++ ) { 1894 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1895 jj++; 1896 } 1897 } 1898 1899 /* create our matrix */ 1900 for ( i=0; i<m; i++ ) { 1901 ourlens[i] -= offlens[i]; 1902 } 1903 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1904 A = *newmat; 1905 MatSetOption(A,MAT_COLUMNS_SORTED); 1906 for ( i=0; i<m; i++ ) { 1907 ourlens[i] += offlens[i]; 1908 } 1909 1910 if (!rank) { 1911 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1912 1913 /* read in my part of the matrix numerical values */ 1914 nz = procsnz[0]; 1915 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1916 1917 /* insert into matrix */ 1918 jj = rstart; 1919 smycols = mycols; 1920 svals = vals; 1921 for ( i=0; i<m; i++ ) { 1922 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1923 smycols += ourlens[i]; 1924 svals += ourlens[i]; 1925 jj++; 1926 } 1927 1928 /* read in other processors and ship out */ 1929 for ( i=1; i<size; i++ ) { 1930 nz = procsnz[i]; 1931 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1932 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1933 } 1934 PetscFree(procsnz); 1935 } else { 1936 /* receive numeric values */ 1937 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1938 1939 /* receive message of values*/ 1940 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1941 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1942 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1943 1944 /* insert into matrix */ 1945 jj = rstart; 1946 smycols = mycols; 1947 svals = vals; 1948 for ( i=0; i<m; i++ ) { 1949 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1950 smycols += ourlens[i]; 1951 svals += ourlens[i]; 1952 jj++; 1953 } 1954 } 1955 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1956 1957 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1958 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNC__ 1963 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 1964 /* 1965 Not great since it makes two copies of the submatrix, first an SeqAIJ 1966 in local and then by concatenating the local matrices the end result. 1967 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1968 */ 1969 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat) 1970 { 1971 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1972 Mat *local,M, Mreuse; 1973 Scalar *vwork,*aa; 1974 MPI_Comm comm = mat->comm; 1975 Mat_SeqAIJ *aij; 1976 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 1977 1978 PetscFunctionBegin; 1979 MPI_Comm_rank(comm,&rank); 1980 MPI_Comm_size(comm,&size); 1981 1982 if (call == MAT_REUSE_MATRIX) { 1983 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1984 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 1985 local = &Mreuse; 1986 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1987 } else { 1988 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1989 Mreuse = *local; 1990 PetscFree(local); 1991 } 1992 1993 /* 1994 m - number of local rows 1995 n - number of columns (same on all processors) 1996 rstart - first row in new global matrix generated 1997 */ 1998 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1999 if (call == MAT_INITIAL_MATRIX) { 2000 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2001 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2002 ii = aij->i; 2003 jj = aij->j; 2004 2005 /* 2006 Determine the number of non-zeros in the diagonal and off-diagonal 2007 portions of the matrix in order to do correct preallocation 2008 */ 2009 2010 /* first get start and end of "diagonal" columns */ 2011 if (csize == PETSC_DECIDE) { 2012 nlocal = n/size + ((n % size) > rank); 2013 } else { 2014 nlocal = csize; 2015 } 2016 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2017 rstart = rend - nlocal; 2018 if (rank == size - 1 && rend != n) { 2019 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 2020 } 2021 2022 /* next, compute all the lengths */ 2023 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 2024 olens = dlens + m; 2025 for ( i=0; i<m; i++ ) { 2026 jend = ii[i+1] - ii[i]; 2027 olen = 0; 2028 dlen = 0; 2029 for ( j=0; j<jend; j++ ) { 2030 if ( *jj < rstart || *jj >= rend) olen++; 2031 else dlen++; 2032 jj++; 2033 } 2034 olens[i] = olen; 2035 dlens[i] = dlen; 2036 } 2037 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2038 PetscFree(dlens); 2039 } else { 2040 int ml,nl; 2041 2042 M = *newmat; 2043 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2044 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2045 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2046 /* 2047 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2048 rather than the slower MatSetValues(). 2049 */ 2050 M->was_assembled = PETSC_TRUE; 2051 M->assembled = PETSC_FALSE; 2052 } 2053 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2054 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2055 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2056 ii = aij->i; 2057 jj = aij->j; 2058 aa = aij->a; 2059 for (i=0; i<m; i++) { 2060 row = rstart + i; 2061 nz = ii[i+1] - ii[i]; 2062 cwork = jj; jj += nz; 2063 vwork = aa; aa += nz; 2064 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2065 } 2066 2067 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2068 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2069 *newmat = M; 2070 2071 /* save submatrix used in processor for next request */ 2072 if (call == MAT_INITIAL_MATRIX) { 2073 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2074 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2075 } 2076 2077 PetscFunctionReturn(0); 2078 } 2079