1 2 #ifdef PETSC_RCS_HEADER 3 static char vcid[] = "$Id: mpiaij.c,v 1.233 1998/03/16 18:55:15 bsmith Exp bsmith $"; 4 #endif 5 6 #include "pinclude/pviewer.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 11 /* local utility routine that creates a mapping from the global column 12 number to the local number in the off-diagonal part of the local 13 storage of the matrix. This is done in a non scable way since the 14 length of colmap equals the global matrix length. 15 */ 16 #undef __FUNC__ 17 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 18 int CreateColmap_MPIAIJ_Private(Mat mat) 19 { 20 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 21 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 22 int n = B->n,i; 23 24 PetscFunctionBegin; 25 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 26 PLogObjectMemory(mat,aij->N*sizeof(int)); 27 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 28 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 29 PetscFunctionReturn(0); 30 } 31 32 extern int DisAssemble_MPIAIJ(Mat); 33 34 #define CHUNKSIZE 15 35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 36 { \ 37 \ 38 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 39 rmax = aimax[row]; nrow = ailen[row]; \ 40 col1 = col - shift; \ 41 \ 42 low = 0; high = nrow; \ 43 while (high-low > 5) { \ 44 t = (low+high)/2; \ 45 if (rp[t] > col) high = t; \ 46 else low = t; \ 47 } \ 48 for ( _i=0; _i<nrow; _i++ ) { \ 49 if (rp[_i] > col1) break; \ 50 if (rp[_i] == col1) { \ 51 if (addv == ADD_VALUES) ap[_i] += value; \ 52 else ap[_i] = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (nonew == 1) goto a_noinsert; \ 57 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 58 if (nrow >= rmax) { \ 59 /* there is no extra room in row, therefore enlarge */ \ 60 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 61 Scalar *new_a; \ 62 \ 63 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 64 \ 65 /* malloc new storage space */ \ 66 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 67 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 68 new_j = (int *) (new_a + new_nz); \ 69 new_i = new_j + new_nz; \ 70 \ 71 /* copy over old data into new slots */ \ 72 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 73 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 74 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 75 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 76 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 77 len*sizeof(int)); \ 78 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 79 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 80 len*sizeof(Scalar)); \ 81 /* free up old matrix storage */ \ 82 \ 83 PetscFree(a->a); \ 84 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 85 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 86 a->singlemalloc = 1; \ 87 \ 88 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 89 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 90 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 91 a->maxnz += CHUNKSIZE; \ 92 a->reallocs++; \ 93 } \ 94 N = nrow++ - 1; a->nz++; \ 95 /* shift up all the later entries in this row */ \ 96 for ( ii=N; ii>=_i; ii-- ) { \ 97 rp[ii+1] = rp[ii]; \ 98 ap[ii+1] = ap[ii]; \ 99 } \ 100 rp[_i] = col1; \ 101 ap[_i] = value; \ 102 a_noinsert: ; \ 103 ailen[row] = nrow; \ 104 } 105 106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 107 { \ 108 \ 109 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 110 rmax = bimax[row]; nrow = bilen[row]; \ 111 col1 = col - shift; \ 112 \ 113 low = 0; high = nrow; \ 114 while (high-low > 5) { \ 115 t = (low+high)/2; \ 116 if (rp[t] > col) high = t; \ 117 else low = t; \ 118 } \ 119 for ( _i=0; _i<nrow; _i++ ) { \ 120 if (rp[_i] > col1) break; \ 121 if (rp[_i] == col1) { \ 122 if (addv == ADD_VALUES) ap[_i] += value; \ 123 else ap[_i] = value; \ 124 goto b_noinsert; \ 125 } \ 126 } \ 127 if (nonew == 1) goto b_noinsert; \ 128 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 129 if (nrow >= rmax) { \ 130 /* there is no extra room in row, therefore enlarge */ \ 131 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 132 Scalar *new_a; \ 133 \ 134 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 135 \ 136 /* malloc new storage space */ \ 137 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 138 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 139 new_j = (int *) (new_a + new_nz); \ 140 new_i = new_j + new_nz; \ 141 \ 142 /* copy over old data into new slots */ \ 143 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 144 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 145 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 146 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 147 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 148 len*sizeof(int)); \ 149 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 150 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 151 len*sizeof(Scalar)); \ 152 /* free up old matrix storage */ \ 153 \ 154 PetscFree(b->a); \ 155 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 156 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 157 b->singlemalloc = 1; \ 158 \ 159 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 160 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 161 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 162 b->maxnz += CHUNKSIZE; \ 163 b->reallocs++; \ 164 } \ 165 N = nrow++ - 1; b->nz++; \ 166 /* shift up all the later entries in this row */ \ 167 for ( ii=N; ii>=_i; ii-- ) { \ 168 rp[ii+1] = rp[ii]; \ 169 ap[ii+1] = ap[ii]; \ 170 } \ 171 rp[_i] = col1; \ 172 ap[_i] = value; \ 173 b_noinsert: ; \ 174 bilen[row] = nrow; \ 175 } 176 177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 178 #undef __FUNC__ 179 #define __FUNC__ "MatSetValues_MPIAIJ" 180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 181 { 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 183 Scalar value; 184 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 185 int cstart = aij->cstart, cend = aij->cend,row,col; 186 int roworiented = aij->roworiented; 187 188 /* Some Variables required in the macro */ 189 Mat A = aij->A; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 191 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 192 Scalar *aa = a->a; 193 194 Mat B = aij->B; 195 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 196 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 197 Scalar *ba = b->a; 198 199 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 200 int nonew = a->nonew,shift = a->indexshift; 201 Scalar *ap; 202 203 PetscFunctionBegin; 204 for ( i=0; i<m; i++ ) { 205 #if defined(USE_PETSC_BOPT_g) 206 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 207 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 208 #endif 209 if (im[i] >= rstart && im[i] < rend) { 210 row = im[i] - rstart; 211 for ( j=0; j<n; j++ ) { 212 if (in[j] >= cstart && in[j] < cend){ 213 col = in[j] - cstart; 214 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 215 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 216 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 217 } 218 #if defined(USE_PETSC_BOPT_g) 219 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 220 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 221 #endif 222 else { 223 if (mat->was_assembled) { 224 if (!aij->colmap) { 225 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 226 } 227 col = aij->colmap[in[j]] - 1; 228 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 229 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 230 col = in[j]; 231 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 232 B = aij->B; 233 b = (Mat_SeqAIJ *) B->data; 234 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 235 ba = b->a; 236 } 237 } 238 else col = in[j]; 239 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 240 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 241 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 242 } 243 } 244 } 245 else { 246 if (roworiented && !aij->donotstash) { 247 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 248 } 249 else { 250 if (!aij->donotstash) { 251 row = im[i]; 252 for ( j=0; j<n; j++ ) { 253 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 254 } 255 } 256 } 257 } 258 } 259 PetscFunctionReturn(0); 260 } 261 262 #undef __FUNC__ 263 #define __FUNC__ "MatGetValues_MPIAIJ" 264 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 265 { 266 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 267 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 268 int cstart = aij->cstart, cend = aij->cend,row,col; 269 270 PetscFunctionBegin; 271 for ( i=0; i<m; i++ ) { 272 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 273 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 274 if (idxm[i] >= rstart && idxm[i] < rend) { 275 row = idxm[i] - rstart; 276 for ( j=0; j<n; j++ ) { 277 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 278 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 279 if (idxn[j] >= cstart && idxn[j] < cend){ 280 col = idxn[j] - cstart; 281 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 282 } 283 else { 284 if (!aij->colmap) { 285 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 286 } 287 col = aij->colmap[idxn[j]] - 1; 288 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 289 else { 290 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 291 } 292 } 293 } 294 } else { 295 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 296 } 297 } 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNC__ 302 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 303 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 306 MPI_Comm comm = mat->comm; 307 int size = aij->size, *owners = aij->rowners; 308 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 309 MPI_Request *send_waits,*recv_waits; 310 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 311 InsertMode addv; 312 Scalar *rvalues,*svalues; 313 314 PetscFunctionBegin; 315 /* make sure all processors are either in INSERTMODE or ADDMODE */ 316 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 317 if (addv == (ADD_VALUES|INSERT_VALUES)) { 318 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 319 } 320 mat->insertmode = addv; /* in case this processor had no cache */ 321 322 /* first count number of contributors to each processor */ 323 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 324 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 325 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 326 for ( i=0; i<aij->stash.n; i++ ) { 327 idx = aij->stash.idx[i]; 328 for ( j=0; j<size; j++ ) { 329 if (idx >= owners[j] && idx < owners[j+1]) { 330 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 331 } 332 } 333 } 334 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 335 336 /* inform other processors of number of messages and max length*/ 337 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 338 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 339 nreceives = work[rank]; 340 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 341 nmax = work[rank]; 342 PetscFree(work); 343 344 /* post receives: 345 1) each message will consist of ordered pairs 346 (global index,value) we store the global index as a double 347 to simplify the message passing. 348 2) since we don't know how long each individual message is we 349 allocate the largest needed buffer for each receive. Potentially 350 this is a lot of wasted space. 351 352 353 This could be done better. 354 */ 355 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 356 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 357 for ( i=0; i<nreceives; i++ ) { 358 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 359 comm,recv_waits+i);CHKERRQ(ierr); 360 } 361 362 /* do sends: 363 1) starts[i] gives the starting index in svalues for stuff going to 364 the ith processor 365 */ 366 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 367 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 368 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 369 starts[0] = 0; 370 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 371 for ( i=0; i<aij->stash.n; i++ ) { 372 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 373 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 374 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 375 } 376 PetscFree(owner); 377 starts[0] = 0; 378 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 379 count = 0; 380 for ( i=0; i<size; i++ ) { 381 if (procs[i]) { 382 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 383 } 384 } 385 PetscFree(starts); PetscFree(nprocs); 386 387 /* Free cache space */ 388 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 389 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 390 391 aij->svalues = svalues; aij->rvalues = rvalues; 392 aij->nsends = nsends; aij->nrecvs = nreceives; 393 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 394 aij->rmax = nmax; 395 396 PetscFunctionReturn(0); 397 } 398 extern int MatSetUpMultiply_MPIAIJ(Mat); 399 400 #undef __FUNC__ 401 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 402 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 403 { 404 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 405 MPI_Status *send_status,recv_status; 406 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 407 int row,col,other_disassembled; 408 Scalar *values,val; 409 InsertMode addv = mat->insertmode; 410 411 PetscFunctionBegin; 412 /* wait on receives */ 413 while (count) { 414 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 415 /* unpack receives into our local space */ 416 values = aij->rvalues + 3*imdex*aij->rmax; 417 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 418 n = n/3; 419 for ( i=0; i<n; i++ ) { 420 row = (int) PetscReal(values[3*i]) - aij->rstart; 421 col = (int) PetscReal(values[3*i+1]); 422 val = values[3*i+2]; 423 if (col >= aij->cstart && col < aij->cend) { 424 col -= aij->cstart; 425 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 426 } else { 427 if (mat->was_assembled || mat->assembled) { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 430 } 431 col = aij->colmap[col] - 1; 432 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 433 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 434 col = (int) PetscReal(values[3*i+1]); 435 } 436 } 437 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 438 } 439 } 440 count--; 441 } 442 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 443 444 /* wait on sends */ 445 if (aij->nsends) { 446 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 447 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 448 PetscFree(send_status); 449 } 450 PetscFree(aij->send_waits); PetscFree(aij->svalues); 451 452 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 453 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 454 455 /* determine if any processor has disassembled, if so we must 456 also disassemble ourselfs, in order that we may reassemble. */ 457 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 458 if (mat->was_assembled && !other_disassembled) { 459 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 460 } 461 462 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 463 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 464 } 465 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 467 468 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNC__ 473 #define __FUNC__ "MatZeroEntries_MPIAIJ" 474 int MatZeroEntries_MPIAIJ(Mat A) 475 { 476 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 477 int ierr; 478 479 PetscFunctionBegin; 480 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 481 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 482 PetscFunctionReturn(0); 483 } 484 485 #undef __FUNC__ 486 #define __FUNC__ "MatZeroRows_MPIAIJ" 487 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 488 { 489 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 490 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 491 int *procs,*nprocs,j,found,idx,nsends,*work,row; 492 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 493 int *rvalues,tag = A->tag,count,base,slen,n,*source; 494 int *lens,imdex,*lrows,*values,rstart=l->rstart; 495 MPI_Comm comm = A->comm; 496 MPI_Request *send_waits,*recv_waits; 497 MPI_Status recv_status,*send_status; 498 IS istmp; 499 500 PetscFunctionBegin; 501 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 502 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 503 504 /* first count number of contributors to each processor */ 505 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 506 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 507 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 508 for ( i=0; i<N; i++ ) { 509 idx = rows[i]; 510 found = 0; 511 for ( j=0; j<size; j++ ) { 512 if (idx >= owners[j] && idx < owners[j+1]) { 513 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 514 } 515 } 516 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 517 } 518 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 519 520 /* inform other processors of number of messages and max length*/ 521 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 522 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 523 nrecvs = work[rank]; 524 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 525 nmax = work[rank]; 526 PetscFree(work); 527 528 /* post receives: */ 529 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 530 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 531 for ( i=0; i<nrecvs; i++ ) { 532 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 533 } 534 535 /* do sends: 536 1) starts[i] gives the starting index in svalues for stuff going to 537 the ith processor 538 */ 539 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 540 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 541 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 542 starts[0] = 0; 543 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 544 for ( i=0; i<N; i++ ) { 545 svalues[starts[owner[i]]++] = rows[i]; 546 } 547 ISRestoreIndices(is,&rows); 548 549 starts[0] = 0; 550 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 551 count = 0; 552 for ( i=0; i<size; i++ ) { 553 if (procs[i]) { 554 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 555 } 556 } 557 PetscFree(starts); 558 559 base = owners[rank]; 560 561 /* wait on receives */ 562 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 563 source = lens + nrecvs; 564 count = nrecvs; slen = 0; 565 while (count) { 566 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 567 /* unpack receives into our local space */ 568 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 569 source[imdex] = recv_status.MPI_SOURCE; 570 lens[imdex] = n; 571 slen += n; 572 count--; 573 } 574 PetscFree(recv_waits); 575 576 /* move the data into the send scatter */ 577 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 578 count = 0; 579 for ( i=0; i<nrecvs; i++ ) { 580 values = rvalues + i*nmax; 581 for ( j=0; j<lens[i]; j++ ) { 582 lrows[count++] = values[j] - base; 583 } 584 } 585 PetscFree(rvalues); PetscFree(lens); 586 PetscFree(owner); PetscFree(nprocs); 587 588 /* actually zap the local rows */ 589 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 590 PLogObjectParent(A,istmp); 591 592 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 593 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 594 ierr = ISDestroy(istmp); CHKERRQ(ierr); 595 596 if (diag) { 597 for ( i = 0; i < slen; i++ ) { 598 row = lrows[i] + rstart; 599 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 600 } 601 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 602 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 603 } 604 PetscFree(lrows); 605 /* wait on sends */ 606 if (nsends) { 607 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 608 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 609 PetscFree(send_status); 610 } 611 PetscFree(send_waits); PetscFree(svalues); 612 613 PetscFunctionReturn(0); 614 } 615 616 #undef __FUNC__ 617 #define __FUNC__ "MatMult_MPIAIJ" 618 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 619 { 620 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 621 int ierr,nt; 622 623 PetscFunctionBegin; 624 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 625 if (nt != a->n) { 626 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 627 } 628 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 629 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 630 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 631 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } 634 635 #undef __FUNC__ 636 #define __FUNC__ "MatMultAdd_MPIAIJ" 637 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 638 { 639 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 640 int ierr; 641 642 PetscFunctionBegin; 643 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 644 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 645 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 646 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 647 PetscFunctionReturn(0); 648 } 649 650 #undef __FUNC__ 651 #define __FUNC__ "MatMultTrans_MPIAIJ" 652 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 653 { 654 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 655 int ierr; 656 657 PetscFunctionBegin; 658 /* do nondiagonal part */ 659 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 660 /* send it on its way */ 661 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 662 /* do local part */ 663 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 664 /* receive remote parts: note this assumes the values are not actually */ 665 /* inserted in yy until the next line, which is true for my implementation*/ 666 /* but is not perhaps always true. */ 667 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 #undef __FUNC__ 672 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 673 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 674 { 675 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 676 int ierr; 677 678 PetscFunctionBegin; 679 /* do nondiagonal part */ 680 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 681 /* send it on its way */ 682 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 683 /* do local part */ 684 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 685 /* receive remote parts: note this assumes the values are not actually */ 686 /* inserted in yy until the next line, which is true for my implementation*/ 687 /* but is not perhaps always true. */ 688 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 689 PetscFunctionReturn(0); 690 } 691 692 /* 693 This only works correctly for square matrices where the subblock A->A is the 694 diagonal block 695 */ 696 #undef __FUNC__ 697 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 698 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 699 { 700 int ierr; 701 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 702 703 PetscFunctionBegin; 704 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 705 if (a->rstart != a->cstart || a->rend != a->cend) { 706 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 707 } 708 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNC__ 713 #define __FUNC__ "MatScale_MPIAIJ" 714 int MatScale_MPIAIJ(Scalar *aa,Mat A) 715 { 716 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 717 int ierr; 718 719 PetscFunctionBegin; 720 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 721 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNC__ 726 #define __FUNC__ "MatDestroy_MPIAIJ" 727 int MatDestroy_MPIAIJ(Mat mat) 728 { 729 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 730 int ierr; 731 732 PetscFunctionBegin; 733 #if defined(USE_PETSC_LOG) 734 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 735 #endif 736 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 737 PetscFree(aij->rowners); 738 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 739 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 740 if (aij->colmap) PetscFree(aij->colmap); 741 if (aij->garray) PetscFree(aij->garray); 742 if (aij->lvec) VecDestroy(aij->lvec); 743 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 744 if (aij->rowvalues) PetscFree(aij->rowvalues); 745 PetscFree(aij); 746 PLogObjectDestroy(mat); 747 PetscHeaderDestroy(mat); 748 PetscFunctionReturn(0); 749 } 750 751 #undef __FUNC__ 752 #define __FUNC__ "MatView_MPIAIJ_Binary" 753 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 754 { 755 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 756 int ierr; 757 758 PetscFunctionBegin; 759 if (aij->size == 1) { 760 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 761 } 762 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 763 PetscFunctionReturn(0); 764 } 765 766 #undef __FUNC__ 767 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 768 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 769 { 770 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 771 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 772 int ierr, format,shift = C->indexshift,rank; 773 FILE *fd; 774 ViewerType vtype; 775 776 PetscFunctionBegin; 777 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 778 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 779 ierr = ViewerGetFormat(viewer,&format); 780 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 781 MatInfo info; 782 int flg; 783 MPI_Comm_rank(mat->comm,&rank); 784 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 785 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 786 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 787 PetscSequentialPhaseBegin(mat->comm,1); 788 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 789 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 790 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 791 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 792 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 793 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 794 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 795 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 796 fflush(fd); 797 PetscSequentialPhaseEnd(mat->comm,1); 798 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 799 PetscFunctionReturn(0); 800 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 801 PetscFunctionReturn(0); 802 } 803 } 804 805 if (vtype == DRAW_VIEWER) { 806 Draw draw; 807 PetscTruth isnull; 808 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 809 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 810 } 811 812 if (vtype == ASCII_FILE_VIEWER) { 813 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 814 PetscSequentialPhaseBegin(mat->comm,1); 815 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 816 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 817 aij->cend); 818 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 819 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 820 fflush(fd); 821 PetscSequentialPhaseEnd(mat->comm,1); 822 } else { 823 int size = aij->size; 824 rank = aij->rank; 825 if (size == 1) { 826 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 827 } else { 828 /* assemble the entire matrix onto first processor. */ 829 Mat A; 830 Mat_SeqAIJ *Aloc; 831 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 832 Scalar *a; 833 834 if (!rank) { 835 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 836 } else { 837 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 838 } 839 PLogObjectParent(mat,A); 840 841 /* copy over the A part */ 842 Aloc = (Mat_SeqAIJ*) aij->A->data; 843 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 844 row = aij->rstart; 845 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 846 for ( i=0; i<m; i++ ) { 847 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 848 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 849 } 850 aj = Aloc->j; 851 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 852 853 /* copy over the B part */ 854 Aloc = (Mat_SeqAIJ*) aij->B->data; 855 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 856 row = aij->rstart; 857 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 858 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 859 for ( i=0; i<m; i++ ) { 860 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 861 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 862 } 863 PetscFree(ct); 864 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 865 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 866 /* 867 Everyone has to call to draw the matrix since the graphics waits are 868 synchronized across all processors that share the Draw object 869 */ 870 if (!rank || vtype == DRAW_VIEWER) { 871 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 872 } 873 ierr = MatDestroy(A); CHKERRQ(ierr); 874 } 875 } 876 PetscFunctionReturn(0); 877 } 878 879 #undef __FUNC__ 880 #define __FUNC__ "MatView_MPIAIJ" 881 int MatView_MPIAIJ(Mat mat,Viewer viewer) 882 { 883 int ierr; 884 ViewerType vtype; 885 886 PetscFunctionBegin; 887 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 888 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 889 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 890 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 891 } 892 else if (vtype == BINARY_FILE_VIEWER) { 893 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 894 } 895 PetscFunctionReturn(0); 896 } 897 898 /* 899 This has to provide several versions. 900 901 2) a) use only local smoothing updating outer values only once. 902 b) local smoothing updating outer values each inner iteration 903 3) color updating out values betwen colors. 904 */ 905 #undef __FUNC__ 906 #define __FUNC__ "MatRelax_MPIAIJ" 907 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 908 double fshift,int its,Vec xx) 909 { 910 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 911 Mat AA = mat->A, BB = mat->B; 912 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 913 Scalar *b,*x,*xs,*ls,d,*v,sum; 914 int ierr,*idx, *diag; 915 int n = mat->n, m = mat->m, i,shift = A->indexshift; 916 917 PetscFunctionBegin; 918 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 919 xs = x + shift; /* shift by one for index start of 1 */ 920 ls = ls + shift; 921 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 922 diag = A->diag; 923 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 924 if (flag & SOR_ZERO_INITIAL_GUESS) { 925 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 926 PetscFunctionReturn(0); 927 } 928 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 929 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 930 while (its--) { 931 /* go down through the rows */ 932 for ( i=0; i<m; i++ ) { 933 n = A->i[i+1] - A->i[i]; 934 idx = A->j + A->i[i] + shift; 935 v = A->a + A->i[i] + shift; 936 sum = b[i]; 937 SPARSEDENSEMDOT(sum,xs,v,idx,n); 938 d = fshift + A->a[diag[i]+shift]; 939 n = B->i[i+1] - B->i[i]; 940 idx = B->j + B->i[i] + shift; 941 v = B->a + B->i[i] + shift; 942 SPARSEDENSEMDOT(sum,ls,v,idx,n); 943 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 944 } 945 /* come up through the rows */ 946 for ( i=m-1; i>-1; i-- ) { 947 n = A->i[i+1] - A->i[i]; 948 idx = A->j + A->i[i] + shift; 949 v = A->a + A->i[i] + shift; 950 sum = b[i]; 951 SPARSEDENSEMDOT(sum,xs,v,idx,n); 952 d = fshift + A->a[diag[i]+shift]; 953 n = B->i[i+1] - B->i[i]; 954 idx = B->j + B->i[i] + shift; 955 v = B->a + B->i[i] + shift; 956 SPARSEDENSEMDOT(sum,ls,v,idx,n); 957 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 958 } 959 } 960 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 961 if (flag & SOR_ZERO_INITIAL_GUESS) { 962 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 966 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 967 while (its--) { 968 for ( i=0; i<m; i++ ) { 969 n = A->i[i+1] - A->i[i]; 970 idx = A->j + A->i[i] + shift; 971 v = A->a + A->i[i] + shift; 972 sum = b[i]; 973 SPARSEDENSEMDOT(sum,xs,v,idx,n); 974 d = fshift + A->a[diag[i]+shift]; 975 n = B->i[i+1] - B->i[i]; 976 idx = B->j + B->i[i] + shift; 977 v = B->a + B->i[i] + shift; 978 SPARSEDENSEMDOT(sum,ls,v,idx,n); 979 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 980 } 981 } 982 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 983 if (flag & SOR_ZERO_INITIAL_GUESS) { 984 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } 987 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 988 mat->Mvctx); CHKERRQ(ierr); 989 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 990 mat->Mvctx); CHKERRQ(ierr); 991 while (its--) { 992 for ( i=m-1; i>-1; i-- ) { 993 n = A->i[i+1] - A->i[i]; 994 idx = A->j + A->i[i] + shift; 995 v = A->a + A->i[i] + shift; 996 sum = b[i]; 997 SPARSEDENSEMDOT(sum,xs,v,idx,n); 998 d = fshift + A->a[diag[i]+shift]; 999 n = B->i[i+1] - B->i[i]; 1000 idx = B->j + B->i[i] + shift; 1001 v = B->a + B->i[i] + shift; 1002 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1003 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1004 } 1005 } 1006 } else { 1007 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNC__ 1013 #define __FUNC__ "MatGetInfo_MPIAIJ" 1014 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1015 { 1016 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1017 Mat A = mat->A, B = mat->B; 1018 int ierr; 1019 double isend[5], irecv[5]; 1020 1021 PetscFunctionBegin; 1022 info->block_size = 1.0; 1023 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1024 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1025 isend[3] = info->memory; isend[4] = info->mallocs; 1026 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1027 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1028 isend[3] += info->memory; isend[4] += info->mallocs; 1029 if (flag == MAT_LOCAL) { 1030 info->nz_used = isend[0]; 1031 info->nz_allocated = isend[1]; 1032 info->nz_unneeded = isend[2]; 1033 info->memory = isend[3]; 1034 info->mallocs = isend[4]; 1035 } else if (flag == MAT_GLOBAL_MAX) { 1036 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1037 info->nz_used = irecv[0]; 1038 info->nz_allocated = irecv[1]; 1039 info->nz_unneeded = irecv[2]; 1040 info->memory = irecv[3]; 1041 info->mallocs = irecv[4]; 1042 } else if (flag == MAT_GLOBAL_SUM) { 1043 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1044 info->nz_used = irecv[0]; 1045 info->nz_allocated = irecv[1]; 1046 info->nz_unneeded = irecv[2]; 1047 info->memory = irecv[3]; 1048 info->mallocs = irecv[4]; 1049 } 1050 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1051 info->fill_ratio_needed = 0; 1052 info->factor_mallocs = 0; 1053 info->rows_global = (double)mat->M; 1054 info->columns_global = (double)mat->N; 1055 info->rows_local = (double)mat->m; 1056 info->columns_local = (double)mat->N; 1057 1058 PetscFunctionReturn(0); 1059 } 1060 1061 #undef __FUNC__ 1062 #define __FUNC__ "MatSetOption_MPIAIJ" 1063 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1064 { 1065 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1066 1067 PetscFunctionBegin; 1068 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1069 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1070 op == MAT_COLUMNS_UNSORTED || 1071 op == MAT_COLUMNS_SORTED || 1072 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1073 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1074 MatSetOption(a->A,op); 1075 MatSetOption(a->B,op); 1076 } else if (op == MAT_ROW_ORIENTED) { 1077 a->roworiented = 1; 1078 MatSetOption(a->A,op); 1079 MatSetOption(a->B,op); 1080 } else if (op == MAT_ROWS_SORTED || 1081 op == MAT_ROWS_UNSORTED || 1082 op == MAT_SYMMETRIC || 1083 op == MAT_STRUCTURALLY_SYMMETRIC || 1084 op == MAT_YES_NEW_DIAGONALS) 1085 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1086 else if (op == MAT_COLUMN_ORIENTED) { 1087 a->roworiented = 0; 1088 MatSetOption(a->A,op); 1089 MatSetOption(a->B,op); 1090 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1091 a->donotstash = 1; 1092 } else if (op == MAT_NO_NEW_DIAGONALS){ 1093 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1094 } else { 1095 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1096 } 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNC__ 1101 #define __FUNC__ "MatGetSize_MPIAIJ" 1102 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1103 { 1104 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1105 1106 PetscFunctionBegin; 1107 if (m) *m = mat->M; 1108 if (n) *n = mat->N; 1109 PetscFunctionReturn(0); 1110 } 1111 1112 #undef __FUNC__ 1113 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1114 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1115 { 1116 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1117 1118 PetscFunctionBegin; 1119 if (m) *m = mat->m; 1120 if (n) *n = mat->n; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNC__ 1125 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1126 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1127 { 1128 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1129 1130 PetscFunctionBegin; 1131 *m = mat->rstart; *n = mat->rend; 1132 PetscFunctionReturn(0); 1133 } 1134 1135 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1136 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1137 1138 #undef __FUNC__ 1139 #define __FUNC__ "MatGetRow_MPIAIJ" 1140 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1141 { 1142 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1143 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1144 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1145 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1146 int *cmap, *idx_p; 1147 1148 PetscFunctionBegin; 1149 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1150 mat->getrowactive = PETSC_TRUE; 1151 1152 if (!mat->rowvalues && (idx || v)) { 1153 /* 1154 allocate enough space to hold information from the longest row. 1155 */ 1156 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1157 int max = 1,m = mat->m,tmp; 1158 for ( i=0; i<m; i++ ) { 1159 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1160 if (max < tmp) { max = tmp; } 1161 } 1162 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1163 CHKPTRQ(mat->rowvalues); 1164 mat->rowindices = (int *) (mat->rowvalues + max); 1165 } 1166 1167 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1168 lrow = row - rstart; 1169 1170 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1171 if (!v) {pvA = 0; pvB = 0;} 1172 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1173 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1174 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1175 nztot = nzA + nzB; 1176 1177 cmap = mat->garray; 1178 if (v || idx) { 1179 if (nztot) { 1180 /* Sort by increasing column numbers, assuming A and B already sorted */ 1181 int imark = -1; 1182 if (v) { 1183 *v = v_p = mat->rowvalues; 1184 for ( i=0; i<nzB; i++ ) { 1185 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1186 else break; 1187 } 1188 imark = i; 1189 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1190 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1191 } 1192 if (idx) { 1193 *idx = idx_p = mat->rowindices; 1194 if (imark > -1) { 1195 for ( i=0; i<imark; i++ ) { 1196 idx_p[i] = cmap[cworkB[i]]; 1197 } 1198 } else { 1199 for ( i=0; i<nzB; i++ ) { 1200 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1201 else break; 1202 } 1203 imark = i; 1204 } 1205 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1206 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1207 } 1208 } 1209 else { 1210 if (idx) *idx = 0; 1211 if (v) *v = 0; 1212 } 1213 } 1214 *nz = nztot; 1215 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1216 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 #undef __FUNC__ 1221 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1222 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1223 { 1224 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1225 1226 PetscFunctionBegin; 1227 if (aij->getrowactive == PETSC_FALSE) { 1228 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1229 } 1230 aij->getrowactive = PETSC_FALSE; 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNC__ 1235 #define __FUNC__ "MatNorm_MPIAIJ" 1236 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1237 { 1238 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1239 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1240 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1241 double sum = 0.0; 1242 Scalar *v; 1243 1244 PetscFunctionBegin; 1245 if (aij->size == 1) { 1246 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1247 } else { 1248 if (type == NORM_FROBENIUS) { 1249 v = amat->a; 1250 for (i=0; i<amat->nz; i++ ) { 1251 #if defined(USE_PETSC_COMPLEX) 1252 sum += real(conj(*v)*(*v)); v++; 1253 #else 1254 sum += (*v)*(*v); v++; 1255 #endif 1256 } 1257 v = bmat->a; 1258 for (i=0; i<bmat->nz; i++ ) { 1259 #if defined(USE_PETSC_COMPLEX) 1260 sum += real(conj(*v)*(*v)); v++; 1261 #else 1262 sum += (*v)*(*v); v++; 1263 #endif 1264 } 1265 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1266 *norm = sqrt(*norm); 1267 } else if (type == NORM_1) { /* max column norm */ 1268 double *tmp, *tmp2; 1269 int *jj, *garray = aij->garray; 1270 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1271 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1272 PetscMemzero(tmp,aij->N*sizeof(double)); 1273 *norm = 0.0; 1274 v = amat->a; jj = amat->j; 1275 for ( j=0; j<amat->nz; j++ ) { 1276 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1277 } 1278 v = bmat->a; jj = bmat->j; 1279 for ( j=0; j<bmat->nz; j++ ) { 1280 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1281 } 1282 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1283 for ( j=0; j<aij->N; j++ ) { 1284 if (tmp2[j] > *norm) *norm = tmp2[j]; 1285 } 1286 PetscFree(tmp); PetscFree(tmp2); 1287 } else if (type == NORM_INFINITY) { /* max row norm */ 1288 double ntemp = 0.0; 1289 for ( j=0; j<amat->m; j++ ) { 1290 v = amat->a + amat->i[j] + shift; 1291 sum = 0.0; 1292 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1293 sum += PetscAbsScalar(*v); v++; 1294 } 1295 v = bmat->a + bmat->i[j] + shift; 1296 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1297 sum += PetscAbsScalar(*v); v++; 1298 } 1299 if (sum > ntemp) ntemp = sum; 1300 } 1301 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1302 } else { 1303 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1304 } 1305 } 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #undef __FUNC__ 1310 #define __FUNC__ "MatTranspose_MPIAIJ" 1311 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1312 { 1313 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1314 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1315 int ierr,shift = Aloc->indexshift; 1316 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1317 Mat B; 1318 Scalar *array; 1319 1320 PetscFunctionBegin; 1321 if (matout == PETSC_NULL && M != N) { 1322 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1323 } 1324 1325 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1326 1327 /* copy over the A part */ 1328 Aloc = (Mat_SeqAIJ*) a->A->data; 1329 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1330 row = a->rstart; 1331 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1332 for ( i=0; i<m; i++ ) { 1333 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1334 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1335 } 1336 aj = Aloc->j; 1337 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1338 1339 /* copy over the B part */ 1340 Aloc = (Mat_SeqAIJ*) a->B->data; 1341 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1342 row = a->rstart; 1343 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1344 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1345 for ( i=0; i<m; i++ ) { 1346 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1347 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1348 } 1349 PetscFree(ct); 1350 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1351 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1352 if (matout != PETSC_NULL) { 1353 *matout = B; 1354 } else { 1355 PetscOps *Abops; 1356 struct _MatOps *Aops; 1357 1358 /* This isn't really an in-place transpose .... but free data structures from a */ 1359 PetscFree(a->rowners); 1360 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1361 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1362 if (a->colmap) PetscFree(a->colmap); 1363 if (a->garray) PetscFree(a->garray); 1364 if (a->lvec) VecDestroy(a->lvec); 1365 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1366 PetscFree(a); 1367 1368 /* 1369 This is horrible, horrible code. We need to keep the 1370 A pointers for the bops and ops but copy everything 1371 else from C. 1372 */ 1373 Abops = A->bops; 1374 Aops = A->ops; 1375 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1376 A->bops = Abops; 1377 A->ops = Aops; 1378 PetscHeaderDestroy(B); 1379 } 1380 PetscFunctionReturn(0); 1381 } 1382 1383 #undef __FUNC__ 1384 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1385 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1386 { 1387 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1388 Mat a = aij->A, b = aij->B; 1389 int ierr,s1,s2,s3; 1390 1391 PetscFunctionBegin; 1392 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1393 if (rr) { 1394 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1395 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1396 /* Overlap communication with computation. */ 1397 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1398 } 1399 if (ll) { 1400 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1401 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1402 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1403 } 1404 /* scale the diagonal block */ 1405 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1406 1407 if (rr) { 1408 /* Do a scatter end and then right scale the off-diagonal block */ 1409 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1410 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1411 } 1412 1413 PetscFunctionReturn(0); 1414 } 1415 1416 1417 extern int MatPrintHelp_SeqAIJ(Mat); 1418 #undef __FUNC__ 1419 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1420 int MatPrintHelp_MPIAIJ(Mat A) 1421 { 1422 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1423 int ierr; 1424 1425 PetscFunctionBegin; 1426 if (!a->rank) { 1427 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1428 } 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNC__ 1433 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1434 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1435 { 1436 PetscFunctionBegin; 1437 *bs = 1; 1438 PetscFunctionReturn(0); 1439 } 1440 #undef __FUNC__ 1441 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1442 int MatSetUnfactored_MPIAIJ(Mat A) 1443 { 1444 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1445 int ierr; 1446 1447 PetscFunctionBegin; 1448 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNC__ 1453 #define __FUNC__ "MatEqual_MPIAIJ" 1454 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1455 { 1456 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1457 Mat a, b, c, d; 1458 PetscTruth flg; 1459 int ierr; 1460 1461 PetscFunctionBegin; 1462 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1463 a = matA->A; b = matA->B; 1464 c = matB->A; d = matB->B; 1465 1466 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1467 if (flg == PETSC_TRUE) { 1468 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1469 } 1470 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1471 PetscFunctionReturn(0); 1472 } 1473 1474 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1475 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1476 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1477 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1478 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *); 1479 1480 /* -------------------------------------------------------------------*/ 1481 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1482 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1483 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1484 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1485 0,0, 1486 0,0, 1487 0,0, 1488 MatRelax_MPIAIJ, 1489 MatTranspose_MPIAIJ, 1490 MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1491 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1492 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1493 0, 1494 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1495 0,0,0,0, 1496 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1497 0,0, 1498 0,0,MatConvertSameType_MPIAIJ,0,0, 1499 0,0,0, 1500 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1501 MatPrintHelp_MPIAIJ, 1502 MatScale_MPIAIJ,0,0,0, 1503 MatGetBlockSize_MPIAIJ,0,0,0,0, 1504 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ, 1505 0,0,MatGetSubMatrix_MPIAIJ}; 1506 1507 1508 #undef __FUNC__ 1509 #define __FUNC__ "MatCreateMPIAIJ" 1510 /*@C 1511 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1512 (the default parallel PETSc format). For good matrix assembly performance 1513 the user should preallocate the matrix storage by setting the parameters 1514 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1515 performance can be increased by more than a factor of 50. 1516 1517 Input Parameters: 1518 . comm - MPI communicator 1519 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1520 This value should be the same as the local size used in creating the 1521 y vector for the matrix-vector product y = Ax. 1522 . n - This value should be the same as the local size used in creating the 1523 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1524 calculated if N is given) For square matrices n is almost always m. 1525 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1526 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1527 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1528 (same for all local rows) 1529 . d_nzz - array containing the number of nonzeros in the various rows of the 1530 diagonal portion of the local submatrix (possibly different for each row) 1531 or PETSC_NULL. You must leave room for the diagonal entry even if 1532 it is zero. 1533 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1534 submatrix (same for all local rows). 1535 . o_nzz - array containing the number of nonzeros in the various rows of the 1536 off-diagonal portion of the local submatrix (possibly different for 1537 each row) or PETSC_NULL. 1538 1539 Output Parameter: 1540 . A - the matrix 1541 1542 Notes: 1543 The AIJ format (also called the Yale sparse matrix format or 1544 compressed row storage), is fully compatible with standard Fortran 77 1545 storage. That is, the stored row and column indices can begin at 1546 either one (as in Fortran) or zero. See the users manual for details. 1547 1548 The user MUST specify either the local or global matrix dimensions 1549 (possibly both). 1550 1551 By default, this format uses inodes (identical nodes) when possible. 1552 We search for consecutive rows with the same nonzero structure, thereby 1553 reusing matrix information to achieve increased efficiency. 1554 1555 Options Database Keys: 1556 $ -mat_aij_no_inode - Do not use inodes 1557 $ -mat_aij_inode_limit <limit> - Set inode limit. 1558 $ (max limit=5) 1559 $ -mat_aij_oneindex - Internally use indexing starting at 1 1560 $ rather than 0. Note: When calling MatSetValues(), 1561 $ the user still MUST index entries starting at 0! 1562 1563 Storage Information: 1564 For a square global matrix we define each processor's diagonal portion 1565 to be its local rows and the corresponding columns (a square submatrix); 1566 each processor's off-diagonal portion encompasses the remainder of the 1567 local matrix (a rectangular submatrix). 1568 1569 The user can specify preallocated storage for the diagonal part of 1570 the local submatrix with either d_nz or d_nnz (not both). Set 1571 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1572 memory allocation. Likewise, specify preallocated storage for the 1573 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1574 1575 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1576 the figure below we depict these three local rows and all columns (0-11). 1577 1578 $ 0 1 2 3 4 5 6 7 8 9 10 11 1579 $ ------------------- 1580 $ row 3 | o o o d d d o o o o o o 1581 $ row 4 | o o o d d d o o o o o o 1582 $ row 5 | o o o d d d o o o o o o 1583 $ ------------------- 1584 $ 1585 1586 Thus, any entries in the d locations are stored in the d (diagonal) 1587 submatrix, and any entries in the o locations are stored in the 1588 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1589 stored simply in the MATSEQAIJ format for compressed row storage. 1590 1591 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1592 and o_nz should indicate the number of nonzeros per row in the o matrix. 1593 In general, for PDE problems in which most nonzeros are near the diagonal, 1594 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1595 or you will get TERRIBLE performance; see the users' manual chapter on 1596 matrices. 1597 1598 .keywords: matrix, aij, compressed row, sparse, parallel 1599 1600 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1601 @*/ 1602 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1603 { 1604 Mat B; 1605 Mat_MPIAIJ *b; 1606 int ierr, i,sum[2],work[2],size; 1607 1608 PetscFunctionBegin; 1609 MPI_Comm_size(comm,&size); 1610 if (size == 1) { 1611 if (M == PETSC_DECIDE) M = m; 1612 if (N == PETSC_DECIDE) N = n; 1613 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 *A = 0; 1618 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1619 PLogObjectCreate(B); 1620 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1621 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1622 PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps)); 1623 B->ops->destroy = MatDestroy_MPIAIJ; 1624 B->ops->view = MatView_MPIAIJ; 1625 B->factor = 0; 1626 B->assembled = PETSC_FALSE; 1627 B->mapping = 0; 1628 1629 B->insertmode = NOT_SET_VALUES; 1630 b->size = size; 1631 MPI_Comm_rank(comm,&b->rank); 1632 1633 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1634 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1635 } 1636 1637 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1638 work[0] = m; work[1] = n; 1639 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1640 if (M == PETSC_DECIDE) M = sum[0]; 1641 if (N == PETSC_DECIDE) N = sum[1]; 1642 } 1643 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1644 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1645 b->m = m; B->m = m; 1646 b->n = n; B->n = n; 1647 b->N = N; B->N = N; 1648 b->M = M; B->M = M; 1649 1650 /* build local table of row and column ownerships */ 1651 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1652 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1653 b->cowners = b->rowners + b->size + 2; 1654 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1655 b->rowners[0] = 0; 1656 for ( i=2; i<=b->size; i++ ) { 1657 b->rowners[i] += b->rowners[i-1]; 1658 } 1659 b->rstart = b->rowners[b->rank]; 1660 b->rend = b->rowners[b->rank+1]; 1661 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1662 b->cowners[0] = 0; 1663 for ( i=2; i<=b->size; i++ ) { 1664 b->cowners[i] += b->cowners[i-1]; 1665 } 1666 b->cstart = b->cowners[b->rank]; 1667 b->cend = b->cowners[b->rank+1]; 1668 1669 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1670 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1671 PLogObjectParent(B,b->A); 1672 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1673 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1674 PLogObjectParent(B,b->B); 1675 1676 /* build cache for off array entries formed */ 1677 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1678 b->donotstash = 0; 1679 b->colmap = 0; 1680 b->garray = 0; 1681 b->roworiented = 1; 1682 1683 /* stuff used for matrix vector multiply */ 1684 b->lvec = 0; 1685 b->Mvctx = 0; 1686 1687 /* stuff for MatGetRow() */ 1688 b->rowindices = 0; 1689 b->rowvalues = 0; 1690 b->getrowactive = PETSC_FALSE; 1691 1692 *A = B; 1693 PetscFunctionReturn(0); 1694 } 1695 1696 #undef __FUNC__ 1697 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1698 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1699 { 1700 Mat mat; 1701 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1702 int ierr, len=0, flg; 1703 1704 PetscFunctionBegin; 1705 *newmat = 0; 1706 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1707 PLogObjectCreate(mat); 1708 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1709 PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps)); 1710 mat->ops->destroy = MatDestroy_MPIAIJ; 1711 mat->ops->view = MatView_MPIAIJ; 1712 mat->factor = matin->factor; 1713 mat->assembled = PETSC_TRUE; 1714 1715 a->m = mat->m = oldmat->m; 1716 a->n = mat->n = oldmat->n; 1717 a->M = mat->M = oldmat->M; 1718 a->N = mat->N = oldmat->N; 1719 1720 a->rstart = oldmat->rstart; 1721 a->rend = oldmat->rend; 1722 a->cstart = oldmat->cstart; 1723 a->cend = oldmat->cend; 1724 a->size = oldmat->size; 1725 a->rank = oldmat->rank; 1726 mat->insertmode = NOT_SET_VALUES; 1727 a->rowvalues = 0; 1728 a->getrowactive = PETSC_FALSE; 1729 1730 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1731 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1732 a->cowners = a->rowners + a->size + 2; 1733 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1734 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1735 if (oldmat->colmap) { 1736 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1737 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1738 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1739 } else a->colmap = 0; 1740 if (oldmat->garray) { 1741 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1742 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1743 PLogObjectMemory(mat,len*sizeof(int)); 1744 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1745 } else a->garray = 0; 1746 1747 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1748 PLogObjectParent(mat,a->lvec); 1749 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1750 PLogObjectParent(mat,a->Mvctx); 1751 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1752 PLogObjectParent(mat,a->A); 1753 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1754 PLogObjectParent(mat,a->B); 1755 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1756 if (flg) { 1757 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1758 } 1759 *newmat = mat; 1760 PetscFunctionReturn(0); 1761 } 1762 1763 #include "sys.h" 1764 1765 #undef __FUNC__ 1766 #define __FUNC__ "MatLoad_MPIAIJ" 1767 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1768 { 1769 Mat A; 1770 Scalar *vals,*svals; 1771 MPI_Comm comm = ((PetscObject)viewer)->comm; 1772 MPI_Status status; 1773 int i, nz, ierr, j,rstart, rend, fd; 1774 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1775 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1776 int tag = ((PetscObject)viewer)->tag; 1777 1778 PetscFunctionBegin; 1779 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1780 if (!rank) { 1781 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1782 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1783 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1784 if (header[3] < 0) { 1785 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1786 } 1787 } 1788 1789 1790 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1791 M = header[1]; N = header[2]; 1792 /* determine ownership of all rows */ 1793 m = M/size + ((M % size) > rank); 1794 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1795 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1796 rowners[0] = 0; 1797 for ( i=2; i<=size; i++ ) { 1798 rowners[i] += rowners[i-1]; 1799 } 1800 rstart = rowners[rank]; 1801 rend = rowners[rank+1]; 1802 1803 /* distribute row lengths to all processors */ 1804 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1805 offlens = ourlens + (rend-rstart); 1806 if (!rank) { 1807 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1808 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1809 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1810 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1811 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1812 PetscFree(sndcounts); 1813 } else { 1814 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1815 } 1816 1817 if (!rank) { 1818 /* calculate the number of nonzeros on each processor */ 1819 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1820 PetscMemzero(procsnz,size*sizeof(int)); 1821 for ( i=0; i<size; i++ ) { 1822 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1823 procsnz[i] += rowlengths[j]; 1824 } 1825 } 1826 PetscFree(rowlengths); 1827 1828 /* determine max buffer needed and allocate it */ 1829 maxnz = 0; 1830 for ( i=0; i<size; i++ ) { 1831 maxnz = PetscMax(maxnz,procsnz[i]); 1832 } 1833 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1834 1835 /* read in my part of the matrix column indices */ 1836 nz = procsnz[0]; 1837 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1838 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1839 1840 /* read in every one elses and ship off */ 1841 for ( i=1; i<size; i++ ) { 1842 nz = procsnz[i]; 1843 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1844 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1845 } 1846 PetscFree(cols); 1847 } else { 1848 /* determine buffer space needed for message */ 1849 nz = 0; 1850 for ( i=0; i<m; i++ ) { 1851 nz += ourlens[i]; 1852 } 1853 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1854 1855 /* receive message of column indices*/ 1856 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1857 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1858 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1859 } 1860 1861 /* loop over local rows, determining number of off diagonal entries */ 1862 PetscMemzero(offlens,m*sizeof(int)); 1863 jj = 0; 1864 for ( i=0; i<m; i++ ) { 1865 for ( j=0; j<ourlens[i]; j++ ) { 1866 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1867 jj++; 1868 } 1869 } 1870 1871 /* create our matrix */ 1872 for ( i=0; i<m; i++ ) { 1873 ourlens[i] -= offlens[i]; 1874 } 1875 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1876 A = *newmat; 1877 MatSetOption(A,MAT_COLUMNS_SORTED); 1878 for ( i=0; i<m; i++ ) { 1879 ourlens[i] += offlens[i]; 1880 } 1881 1882 if (!rank) { 1883 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1884 1885 /* read in my part of the matrix numerical values */ 1886 nz = procsnz[0]; 1887 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1888 1889 /* insert into matrix */ 1890 jj = rstart; 1891 smycols = mycols; 1892 svals = vals; 1893 for ( i=0; i<m; i++ ) { 1894 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1895 smycols += ourlens[i]; 1896 svals += ourlens[i]; 1897 jj++; 1898 } 1899 1900 /* read in other processors and ship out */ 1901 for ( i=1; i<size; i++ ) { 1902 nz = procsnz[i]; 1903 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1904 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1905 } 1906 PetscFree(procsnz); 1907 } else { 1908 /* receive numeric values */ 1909 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1910 1911 /* receive message of values*/ 1912 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1913 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1914 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1915 1916 /* insert into matrix */ 1917 jj = rstart; 1918 smycols = mycols; 1919 svals = vals; 1920 for ( i=0; i<m; i++ ) { 1921 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1922 smycols += ourlens[i]; 1923 svals += ourlens[i]; 1924 jj++; 1925 } 1926 } 1927 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1928 1929 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1930 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1931 PetscFunctionReturn(0); 1932 } 1933 1934 #undef __FUNC__ 1935 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 1936 /* 1937 Not great since it makes two copies of the submatrix, first an SeqAIJ 1938 in local and then by concatenating the local matrices the end result. 1939 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1940 */ 1941 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat) 1942 { 1943 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1944 Mat *local,M; 1945 Scalar *vwork,*aa; 1946 MPI_Comm comm = mat->comm; 1947 Mat_SeqAIJ *aij; 1948 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 1949 1950 PetscFunctionBegin; 1951 MPI_Comm_rank(comm,&rank); 1952 MPI_Comm_size(comm,&size); 1953 1954 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1955 1956 /* 1957 m - number of local rows 1958 n - number of columns (same on all processors) 1959 rstart - first row in new global matrix generated 1960 */ 1961 ierr = MatGetSize(*local,&m,&n);CHKERRQ(ierr); 1962 if (call == MAT_INITIAL_MATRIX) { 1963 aij = (Mat_SeqAIJ *) (*local)->data; 1964 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 1965 ii = aij->i; 1966 jj = aij->j; 1967 1968 /* 1969 Determine the number of non-zeros in the diagonal and off-diagonal 1970 portions of the matrix in order to do correct preallocation 1971 */ 1972 1973 /* first get start and end of "diagonal" columns */ 1974 if (csize == PETSC_DECIDE) { 1975 nlocal = n/size + ((n % size) > rank); 1976 } else { 1977 nlocal = csize; 1978 } 1979 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1980 rstart = rend - nlocal; 1981 if (rank == size - 1 && rend != n) { 1982 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 1983 } 1984 1985 /* next, compute all the lengths */ 1986 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 1987 olens = dlens + m; 1988 for ( i=0; i<m; i++ ) { 1989 jend = ii[i+1] - ii[i]; 1990 olen = 0; 1991 dlen = 0; 1992 for ( j=0; j<jend; j++ ) { 1993 if ( *jj < rstart || *jj >= rend) olen++; 1994 else dlen++; 1995 jj++; 1996 } 1997 olens[i] = olen; 1998 dlens[i] = dlen; 1999 } 2000 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2001 PetscFree(dlens); 2002 } else { 2003 int ml,nl; 2004 2005 M = *newmat; 2006 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2007 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2008 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2009 } 2010 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2011 aij = (Mat_SeqAIJ *) (*local)->data; 2012 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2013 ii = aij->i; 2014 jj = aij->j; 2015 aa = aij->a; 2016 for (i=0; i<m; i++) { 2017 row = rstart + i; 2018 nz = ii[i+1] - ii[i]; 2019 cwork = jj; jj += nz; 2020 vwork = aa; aa += nz; 2021 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2022 } 2023 2024 ierr = MatDestroyMatrices(1,&local);CHKERRQ(ierr); 2025 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2026 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2027 *newmat = M; 2028 PetscFunctionReturn(0); 2029 } 2030