1 2 #ifdef PETSC_RCS_HEADER 3 static char vcid[] = "$Id: mpiaij.c,v 1.232 1998/03/12 23:18:35 bsmith Exp bsmith $"; 4 #endif 5 6 #include "pinclude/pviewer.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 11 /* local utility routine that creates a mapping from the global column 12 number to the local number in the off-diagonal part of the local 13 storage of the matrix. This is done in a non scable way since the 14 length of colmap equals the global matrix length. 15 */ 16 #undef __FUNC__ 17 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 18 int CreateColmap_MPIAIJ_Private(Mat mat) 19 { 20 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 21 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 22 int n = B->n,i; 23 24 PetscFunctionBegin; 25 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 26 PLogObjectMemory(mat,aij->N*sizeof(int)); 27 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 28 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 29 PetscFunctionReturn(0); 30 } 31 32 extern int DisAssemble_MPIAIJ(Mat); 33 34 #define CHUNKSIZE 15 35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 36 { \ 37 \ 38 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 39 rmax = aimax[row]; nrow = ailen[row]; \ 40 col1 = col - shift; \ 41 \ 42 low = 0; high = nrow; \ 43 while (high-low > 5) { \ 44 t = (low+high)/2; \ 45 if (rp[t] > col) high = t; \ 46 else low = t; \ 47 } \ 48 for ( _i=0; _i<nrow; _i++ ) { \ 49 if (rp[_i] > col1) break; \ 50 if (rp[_i] == col1) { \ 51 if (addv == ADD_VALUES) ap[_i] += value; \ 52 else ap[_i] = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (nonew == 1) goto a_noinsert; \ 57 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 58 if (nrow >= rmax) { \ 59 /* there is no extra room in row, therefore enlarge */ \ 60 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 61 Scalar *new_a; \ 62 \ 63 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 64 \ 65 /* malloc new storage space */ \ 66 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 67 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 68 new_j = (int *) (new_a + new_nz); \ 69 new_i = new_j + new_nz; \ 70 \ 71 /* copy over old data into new slots */ \ 72 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 73 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 74 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 75 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 76 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 77 len*sizeof(int)); \ 78 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 79 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 80 len*sizeof(Scalar)); \ 81 /* free up old matrix storage */ \ 82 \ 83 PetscFree(a->a); \ 84 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 85 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 86 a->singlemalloc = 1; \ 87 \ 88 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 89 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 90 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 91 a->maxnz += CHUNKSIZE; \ 92 a->reallocs++; \ 93 } \ 94 N = nrow++ - 1; a->nz++; \ 95 /* shift up all the later entries in this row */ \ 96 for ( ii=N; ii>=_i; ii-- ) { \ 97 rp[ii+1] = rp[ii]; \ 98 ap[ii+1] = ap[ii]; \ 99 } \ 100 rp[_i] = col1; \ 101 ap[_i] = value; \ 102 a_noinsert: ; \ 103 ailen[row] = nrow; \ 104 } 105 106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 107 { \ 108 \ 109 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 110 rmax = bimax[row]; nrow = bilen[row]; \ 111 col1 = col - shift; \ 112 \ 113 low = 0; high = nrow; \ 114 while (high-low > 5) { \ 115 t = (low+high)/2; \ 116 if (rp[t] > col) high = t; \ 117 else low = t; \ 118 } \ 119 for ( _i=0; _i<nrow; _i++ ) { \ 120 if (rp[_i] > col1) break; \ 121 if (rp[_i] == col1) { \ 122 if (addv == ADD_VALUES) ap[_i] += value; \ 123 else ap[_i] = value; \ 124 goto b_noinsert; \ 125 } \ 126 } \ 127 if (nonew == 1) goto b_noinsert; \ 128 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 129 if (nrow >= rmax) { \ 130 /* there is no extra room in row, therefore enlarge */ \ 131 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 132 Scalar *new_a; \ 133 \ 134 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 135 \ 136 /* malloc new storage space */ \ 137 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 138 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 139 new_j = (int *) (new_a + new_nz); \ 140 new_i = new_j + new_nz; \ 141 \ 142 /* copy over old data into new slots */ \ 143 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 144 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 145 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 146 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 147 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 148 len*sizeof(int)); \ 149 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 150 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 151 len*sizeof(Scalar)); \ 152 /* free up old matrix storage */ \ 153 \ 154 PetscFree(b->a); \ 155 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 156 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 157 b->singlemalloc = 1; \ 158 \ 159 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 160 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 161 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 162 b->maxnz += CHUNKSIZE; \ 163 b->reallocs++; \ 164 } \ 165 N = nrow++ - 1; b->nz++; \ 166 /* shift up all the later entries in this row */ \ 167 for ( ii=N; ii>=_i; ii-- ) { \ 168 rp[ii+1] = rp[ii]; \ 169 ap[ii+1] = ap[ii]; \ 170 } \ 171 rp[_i] = col1; \ 172 ap[_i] = value; \ 173 b_noinsert: ; \ 174 bilen[row] = nrow; \ 175 } 176 177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 178 #undef __FUNC__ 179 #define __FUNC__ "MatSetValues_MPIAIJ" 180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 181 { 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 183 Scalar value; 184 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 185 int cstart = aij->cstart, cend = aij->cend,row,col; 186 int roworiented = aij->roworiented; 187 188 /* Some Variables required in the macro */ 189 Mat A = aij->A; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 191 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 192 Scalar *aa = a->a; 193 194 Mat B = aij->B; 195 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 196 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 197 Scalar *ba = b->a; 198 199 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 200 int nonew = a->nonew,shift = a->indexshift; 201 Scalar *ap; 202 203 PetscFunctionBegin; 204 for ( i=0; i<m; i++ ) { 205 #if defined(USE_PETSC_BOPT_g) 206 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 207 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 208 #endif 209 if (im[i] >= rstart && im[i] < rend) { 210 row = im[i] - rstart; 211 for ( j=0; j<n; j++ ) { 212 if (in[j] >= cstart && in[j] < cend){ 213 col = in[j] - cstart; 214 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 215 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 216 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 217 } 218 #if defined(USE_PETSC_BOPT_g) 219 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 220 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 221 #endif 222 else { 223 if (mat->was_assembled) { 224 if (!aij->colmap) { 225 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 226 } 227 col = aij->colmap[in[j]] - 1; 228 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 229 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 230 col = in[j]; 231 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 232 B = aij->B; 233 b = (Mat_SeqAIJ *) B->data; 234 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 235 ba = b->a; 236 } 237 } 238 else col = in[j]; 239 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 240 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 241 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 242 } 243 } 244 } 245 else { 246 if (roworiented && !aij->donotstash) { 247 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 248 } 249 else { 250 if (!aij->donotstash) { 251 row = im[i]; 252 for ( j=0; j<n; j++ ) { 253 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 254 } 255 } 256 } 257 } 258 } 259 PetscFunctionReturn(0); 260 } 261 262 #undef __FUNC__ 263 #define __FUNC__ "MatGetValues_MPIAIJ" 264 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 265 { 266 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 267 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 268 int cstart = aij->cstart, cend = aij->cend,row,col; 269 270 PetscFunctionBegin; 271 for ( i=0; i<m; i++ ) { 272 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 273 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 274 if (idxm[i] >= rstart && idxm[i] < rend) { 275 row = idxm[i] - rstart; 276 for ( j=0; j<n; j++ ) { 277 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 278 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 279 if (idxn[j] >= cstart && idxn[j] < cend){ 280 col = idxn[j] - cstart; 281 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 282 } 283 else { 284 if (!aij->colmap) { 285 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 286 } 287 col = aij->colmap[idxn[j]] - 1; 288 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 289 else { 290 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 291 } 292 } 293 } 294 } else { 295 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 296 } 297 } 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNC__ 302 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 303 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 304 { 305 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 306 MPI_Comm comm = mat->comm; 307 int size = aij->size, *owners = aij->rowners; 308 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 309 MPI_Request *send_waits,*recv_waits; 310 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 311 InsertMode addv; 312 Scalar *rvalues,*svalues; 313 314 PetscFunctionBegin; 315 /* make sure all processors are either in INSERTMODE or ADDMODE */ 316 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 317 if (addv == (ADD_VALUES|INSERT_VALUES)) { 318 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 319 } 320 mat->insertmode = addv; /* in case this processor had no cache */ 321 322 /* first count number of contributors to each processor */ 323 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 324 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 325 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 326 for ( i=0; i<aij->stash.n; i++ ) { 327 idx = aij->stash.idx[i]; 328 for ( j=0; j<size; j++ ) { 329 if (idx >= owners[j] && idx < owners[j+1]) { 330 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 331 } 332 } 333 } 334 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 335 336 /* inform other processors of number of messages and max length*/ 337 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 338 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 339 nreceives = work[rank]; 340 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 341 nmax = work[rank]; 342 PetscFree(work); 343 344 /* post receives: 345 1) each message will consist of ordered pairs 346 (global index,value) we store the global index as a double 347 to simplify the message passing. 348 2) since we don't know how long each individual message is we 349 allocate the largest needed buffer for each receive. Potentially 350 this is a lot of wasted space. 351 352 353 This could be done better. 354 */ 355 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 356 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 357 for ( i=0; i<nreceives; i++ ) { 358 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 359 comm,recv_waits+i);CHKERRQ(ierr); 360 } 361 362 /* do sends: 363 1) starts[i] gives the starting index in svalues for stuff going to 364 the ith processor 365 */ 366 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 367 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 368 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 369 starts[0] = 0; 370 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 371 for ( i=0; i<aij->stash.n; i++ ) { 372 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 373 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 374 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 375 } 376 PetscFree(owner); 377 starts[0] = 0; 378 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 379 count = 0; 380 for ( i=0; i<size; i++ ) { 381 if (procs[i]) { 382 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 383 } 384 } 385 PetscFree(starts); PetscFree(nprocs); 386 387 /* Free cache space */ 388 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 389 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 390 391 aij->svalues = svalues; aij->rvalues = rvalues; 392 aij->nsends = nsends; aij->nrecvs = nreceives; 393 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 394 aij->rmax = nmax; 395 396 PetscFunctionReturn(0); 397 } 398 extern int MatSetUpMultiply_MPIAIJ(Mat); 399 400 #undef __FUNC__ 401 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 402 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 403 { 404 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 405 MPI_Status *send_status,recv_status; 406 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 407 int row,col,other_disassembled; 408 Scalar *values,val; 409 InsertMode addv = mat->insertmode; 410 411 PetscFunctionBegin; 412 /* wait on receives */ 413 while (count) { 414 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 415 /* unpack receives into our local space */ 416 values = aij->rvalues + 3*imdex*aij->rmax; 417 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 418 n = n/3; 419 for ( i=0; i<n; i++ ) { 420 row = (int) PetscReal(values[3*i]) - aij->rstart; 421 col = (int) PetscReal(values[3*i+1]); 422 val = values[3*i+2]; 423 if (col >= aij->cstart && col < aij->cend) { 424 col -= aij->cstart; 425 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 426 } else { 427 if (mat->was_assembled || mat->assembled) { 428 if (!aij->colmap) { 429 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 430 } 431 col = aij->colmap[col] - 1; 432 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 433 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 434 col = (int) PetscReal(values[3*i+1]); 435 } 436 } 437 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 438 } 439 } 440 count--; 441 } 442 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 443 444 /* wait on sends */ 445 if (aij->nsends) { 446 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 447 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 448 PetscFree(send_status); 449 } 450 PetscFree(aij->send_waits); PetscFree(aij->svalues); 451 452 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 453 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 454 455 /* determine if any processor has disassembled, if so we must 456 also disassemble ourselfs, in order that we may reassemble. */ 457 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 458 if (mat->was_assembled && !other_disassembled) { 459 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 460 } 461 462 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 463 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 464 } 465 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 466 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 467 468 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 469 PetscFunctionReturn(0); 470 } 471 472 #undef __FUNC__ 473 #define __FUNC__ "MatZeroEntries_MPIAIJ" 474 int MatZeroEntries_MPIAIJ(Mat A) 475 { 476 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 477 int ierr; 478 479 PetscFunctionBegin; 480 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 481 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 482 PetscFunctionReturn(0); 483 } 484 485 #undef __FUNC__ 486 #define __FUNC__ "MatZeroRows_MPIAIJ" 487 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 488 { 489 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 490 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 491 int *procs,*nprocs,j,found,idx,nsends,*work,row; 492 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 493 int *rvalues,tag = A->tag,count,base,slen,n,*source; 494 int *lens,imdex,*lrows,*values,rstart=l->rstart; 495 MPI_Comm comm = A->comm; 496 MPI_Request *send_waits,*recv_waits; 497 MPI_Status recv_status,*send_status; 498 IS istmp; 499 500 PetscFunctionBegin; 501 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 502 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 503 504 /* first count number of contributors to each processor */ 505 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 506 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 507 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 508 for ( i=0; i<N; i++ ) { 509 idx = rows[i]; 510 found = 0; 511 for ( j=0; j<size; j++ ) { 512 if (idx >= owners[j] && idx < owners[j+1]) { 513 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 514 } 515 } 516 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 517 } 518 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 519 520 /* inform other processors of number of messages and max length*/ 521 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 522 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 523 nrecvs = work[rank]; 524 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 525 nmax = work[rank]; 526 PetscFree(work); 527 528 /* post receives: */ 529 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 530 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 531 for ( i=0; i<nrecvs; i++ ) { 532 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 533 } 534 535 /* do sends: 536 1) starts[i] gives the starting index in svalues for stuff going to 537 the ith processor 538 */ 539 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 540 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 541 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 542 starts[0] = 0; 543 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 544 for ( i=0; i<N; i++ ) { 545 svalues[starts[owner[i]]++] = rows[i]; 546 } 547 ISRestoreIndices(is,&rows); 548 549 starts[0] = 0; 550 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 551 count = 0; 552 for ( i=0; i<size; i++ ) { 553 if (procs[i]) { 554 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 555 } 556 } 557 PetscFree(starts); 558 559 base = owners[rank]; 560 561 /* wait on receives */ 562 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 563 source = lens + nrecvs; 564 count = nrecvs; slen = 0; 565 while (count) { 566 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 567 /* unpack receives into our local space */ 568 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 569 source[imdex] = recv_status.MPI_SOURCE; 570 lens[imdex] = n; 571 slen += n; 572 count--; 573 } 574 PetscFree(recv_waits); 575 576 /* move the data into the send scatter */ 577 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 578 count = 0; 579 for ( i=0; i<nrecvs; i++ ) { 580 values = rvalues + i*nmax; 581 for ( j=0; j<lens[i]; j++ ) { 582 lrows[count++] = values[j] - base; 583 } 584 } 585 PetscFree(rvalues); PetscFree(lens); 586 PetscFree(owner); PetscFree(nprocs); 587 588 /* actually zap the local rows */ 589 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 590 PLogObjectParent(A,istmp); 591 592 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 593 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 594 ierr = ISDestroy(istmp); CHKERRQ(ierr); 595 596 if (diag) { 597 for ( i = 0; i < slen; i++ ) { 598 row = lrows[i] + rstart; 599 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 600 } 601 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 602 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 603 } 604 PetscFree(lrows); 605 /* wait on sends */ 606 if (nsends) { 607 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 608 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 609 PetscFree(send_status); 610 } 611 PetscFree(send_waits); PetscFree(svalues); 612 613 PetscFunctionReturn(0); 614 } 615 616 #undef __FUNC__ 617 #define __FUNC__ "MatMult_MPIAIJ" 618 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 619 { 620 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 621 int ierr,nt; 622 623 PetscFunctionBegin; 624 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 625 if (nt != a->n) { 626 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 627 } 628 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 629 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 630 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 631 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 632 PetscFunctionReturn(0); 633 } 634 635 #undef __FUNC__ 636 #define __FUNC__ "MatMultAdd_MPIAIJ" 637 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 638 { 639 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 640 int ierr; 641 642 PetscFunctionBegin; 643 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 644 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 645 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 646 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 647 PetscFunctionReturn(0); 648 } 649 650 #undef __FUNC__ 651 #define __FUNC__ "MatMultTrans_MPIAIJ" 652 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 653 { 654 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 655 int ierr; 656 657 PetscFunctionBegin; 658 /* do nondiagonal part */ 659 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 660 /* send it on its way */ 661 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 662 /* do local part */ 663 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 664 /* receive remote parts: note this assumes the values are not actually */ 665 /* inserted in yy until the next line, which is true for my implementation*/ 666 /* but is not perhaps always true. */ 667 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 668 PetscFunctionReturn(0); 669 } 670 671 #undef __FUNC__ 672 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 673 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 674 { 675 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 676 int ierr; 677 678 PetscFunctionBegin; 679 /* do nondiagonal part */ 680 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 681 /* send it on its way */ 682 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 683 /* do local part */ 684 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 685 /* receive remote parts: note this assumes the values are not actually */ 686 /* inserted in yy until the next line, which is true for my implementation*/ 687 /* but is not perhaps always true. */ 688 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 689 PetscFunctionReturn(0); 690 } 691 692 /* 693 This only works correctly for square matrices where the subblock A->A is the 694 diagonal block 695 */ 696 #undef __FUNC__ 697 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 698 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 699 { 700 int ierr; 701 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 702 703 PetscFunctionBegin; 704 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 705 if (a->rstart != a->cstart || a->rend != a->cend) { 706 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 707 } 708 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNC__ 713 #define __FUNC__ "MatScale_MPIAIJ" 714 int MatScale_MPIAIJ(Scalar *aa,Mat A) 715 { 716 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 717 int ierr; 718 719 PetscFunctionBegin; 720 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 721 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNC__ 726 #define __FUNC__ "MatDestroy_MPIAIJ" 727 int MatDestroy_MPIAIJ(PetscObject obj) 728 { 729 Mat mat = (Mat) obj; 730 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 731 int ierr; 732 733 PetscFunctionBegin; 734 #if defined(USE_PETSC_LOG) 735 PLogObjectState(obj,"Rows=%d, Cols=%d",aij->M,aij->N); 736 #endif 737 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 738 PetscFree(aij->rowners); 739 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 740 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 741 if (aij->colmap) PetscFree(aij->colmap); 742 if (aij->garray) PetscFree(aij->garray); 743 if (aij->lvec) VecDestroy(aij->lvec); 744 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 745 if (aij->rowvalues) PetscFree(aij->rowvalues); 746 PetscFree(aij); 747 PLogObjectDestroy(mat); 748 PetscHeaderDestroy(mat); 749 PetscFunctionReturn(0); 750 } 751 752 #undef __FUNC__ 753 #define __FUNC__ "MatView_MPIAIJ_Binary" 754 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 755 { 756 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 757 int ierr; 758 759 PetscFunctionBegin; 760 if (aij->size == 1) { 761 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 762 } 763 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 764 PetscFunctionReturn(0); 765 } 766 767 #undef __FUNC__ 768 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 769 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 770 { 771 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 772 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 773 int ierr, format,shift = C->indexshift,rank; 774 FILE *fd; 775 ViewerType vtype; 776 777 PetscFunctionBegin; 778 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 779 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 780 ierr = ViewerGetFormat(viewer,&format); 781 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 782 MatInfo info; 783 int flg; 784 MPI_Comm_rank(mat->comm,&rank); 785 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 786 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 787 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 788 PetscSequentialPhaseBegin(mat->comm,1); 789 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 790 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 791 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 792 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 793 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 794 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 795 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 796 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 797 fflush(fd); 798 PetscSequentialPhaseEnd(mat->comm,1); 799 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 800 PetscFunctionReturn(0); 801 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 802 PetscFunctionReturn(0); 803 } 804 } 805 806 if (vtype == DRAW_VIEWER) { 807 Draw draw; 808 PetscTruth isnull; 809 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 810 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 811 } 812 813 if (vtype == ASCII_FILE_VIEWER) { 814 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 815 PetscSequentialPhaseBegin(mat->comm,1); 816 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 817 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 818 aij->cend); 819 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 820 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 821 fflush(fd); 822 PetscSequentialPhaseEnd(mat->comm,1); 823 } else { 824 int size = aij->size; 825 rank = aij->rank; 826 if (size == 1) { 827 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 828 } else { 829 /* assemble the entire matrix onto first processor. */ 830 Mat A; 831 Mat_SeqAIJ *Aloc; 832 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 833 Scalar *a; 834 835 if (!rank) { 836 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 837 } else { 838 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 839 } 840 PLogObjectParent(mat,A); 841 842 /* copy over the A part */ 843 Aloc = (Mat_SeqAIJ*) aij->A->data; 844 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 845 row = aij->rstart; 846 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 847 for ( i=0; i<m; i++ ) { 848 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 849 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 850 } 851 aj = Aloc->j; 852 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 853 854 /* copy over the B part */ 855 Aloc = (Mat_SeqAIJ*) aij->B->data; 856 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 857 row = aij->rstart; 858 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 859 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 860 for ( i=0; i<m; i++ ) { 861 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 862 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 863 } 864 PetscFree(ct); 865 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 866 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 867 /* 868 Everyone has to call to draw the matrix since the graphics waits are 869 synchronized across all processors that share the Draw object 870 */ 871 if (!rank || vtype == DRAW_VIEWER) { 872 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 873 } 874 ierr = MatDestroy(A); CHKERRQ(ierr); 875 } 876 } 877 PetscFunctionReturn(0); 878 } 879 880 #undef __FUNC__ 881 #define __FUNC__ "MatView_MPIAIJ" 882 int MatView_MPIAIJ(PetscObject obj,Viewer viewer) 883 { 884 Mat mat = (Mat) obj; 885 int ierr; 886 ViewerType vtype; 887 888 PetscFunctionBegin; 889 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 890 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 891 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 892 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 893 } 894 else if (vtype == BINARY_FILE_VIEWER) { 895 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 896 } 897 PetscFunctionReturn(0); 898 } 899 900 /* 901 This has to provide several versions. 902 903 2) a) use only local smoothing updating outer values only once. 904 b) local smoothing updating outer values each inner iteration 905 3) color updating out values betwen colors. 906 */ 907 #undef __FUNC__ 908 #define __FUNC__ "MatRelax_MPIAIJ" 909 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 910 double fshift,int its,Vec xx) 911 { 912 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 913 Mat AA = mat->A, BB = mat->B; 914 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 915 Scalar *b,*x,*xs,*ls,d,*v,sum; 916 int ierr,*idx, *diag; 917 int n = mat->n, m = mat->m, i,shift = A->indexshift; 918 919 PetscFunctionBegin; 920 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 921 xs = x + shift; /* shift by one for index start of 1 */ 922 ls = ls + shift; 923 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 924 diag = A->diag; 925 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 926 if (flag & SOR_ZERO_INITIAL_GUESS) { 927 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 928 PetscFunctionReturn(0); 929 } 930 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 931 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 932 while (its--) { 933 /* go down through the rows */ 934 for ( i=0; i<m; i++ ) { 935 n = A->i[i+1] - A->i[i]; 936 idx = A->j + A->i[i] + shift; 937 v = A->a + A->i[i] + shift; 938 sum = b[i]; 939 SPARSEDENSEMDOT(sum,xs,v,idx,n); 940 d = fshift + A->a[diag[i]+shift]; 941 n = B->i[i+1] - B->i[i]; 942 idx = B->j + B->i[i] + shift; 943 v = B->a + B->i[i] + shift; 944 SPARSEDENSEMDOT(sum,ls,v,idx,n); 945 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 946 } 947 /* come up through the rows */ 948 for ( i=m-1; i>-1; i-- ) { 949 n = A->i[i+1] - A->i[i]; 950 idx = A->j + A->i[i] + shift; 951 v = A->a + A->i[i] + shift; 952 sum = b[i]; 953 SPARSEDENSEMDOT(sum,xs,v,idx,n); 954 d = fshift + A->a[diag[i]+shift]; 955 n = B->i[i+1] - B->i[i]; 956 idx = B->j + B->i[i] + shift; 957 v = B->a + B->i[i] + shift; 958 SPARSEDENSEMDOT(sum,ls,v,idx,n); 959 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 960 } 961 } 962 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 963 if (flag & SOR_ZERO_INITIAL_GUESS) { 964 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 968 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 969 while (its--) { 970 for ( i=0; i<m; i++ ) { 971 n = A->i[i+1] - A->i[i]; 972 idx = A->j + A->i[i] + shift; 973 v = A->a + A->i[i] + shift; 974 sum = b[i]; 975 SPARSEDENSEMDOT(sum,xs,v,idx,n); 976 d = fshift + A->a[diag[i]+shift]; 977 n = B->i[i+1] - B->i[i]; 978 idx = B->j + B->i[i] + shift; 979 v = B->a + B->i[i] + shift; 980 SPARSEDENSEMDOT(sum,ls,v,idx,n); 981 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 982 } 983 } 984 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 985 if (flag & SOR_ZERO_INITIAL_GUESS) { 986 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 990 mat->Mvctx); CHKERRQ(ierr); 991 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 992 mat->Mvctx); CHKERRQ(ierr); 993 while (its--) { 994 for ( i=m-1; i>-1; i-- ) { 995 n = A->i[i+1] - A->i[i]; 996 idx = A->j + A->i[i] + shift; 997 v = A->a + A->i[i] + shift; 998 sum = b[i]; 999 SPARSEDENSEMDOT(sum,xs,v,idx,n); 1000 d = fshift + A->a[diag[i]+shift]; 1001 n = B->i[i+1] - B->i[i]; 1002 idx = B->j + B->i[i] + shift; 1003 v = B->a + B->i[i] + shift; 1004 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1005 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1006 } 1007 } 1008 } else { 1009 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1010 } 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNC__ 1015 #define __FUNC__ "MatGetInfo_MPIAIJ" 1016 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1017 { 1018 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1019 Mat A = mat->A, B = mat->B; 1020 int ierr; 1021 double isend[5], irecv[5]; 1022 1023 PetscFunctionBegin; 1024 info->block_size = 1.0; 1025 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1026 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1027 isend[3] = info->memory; isend[4] = info->mallocs; 1028 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1029 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1030 isend[3] += info->memory; isend[4] += info->mallocs; 1031 if (flag == MAT_LOCAL) { 1032 info->nz_used = isend[0]; 1033 info->nz_allocated = isend[1]; 1034 info->nz_unneeded = isend[2]; 1035 info->memory = isend[3]; 1036 info->mallocs = isend[4]; 1037 } else if (flag == MAT_GLOBAL_MAX) { 1038 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1039 info->nz_used = irecv[0]; 1040 info->nz_allocated = irecv[1]; 1041 info->nz_unneeded = irecv[2]; 1042 info->memory = irecv[3]; 1043 info->mallocs = irecv[4]; 1044 } else if (flag == MAT_GLOBAL_SUM) { 1045 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1046 info->nz_used = irecv[0]; 1047 info->nz_allocated = irecv[1]; 1048 info->nz_unneeded = irecv[2]; 1049 info->memory = irecv[3]; 1050 info->mallocs = irecv[4]; 1051 } 1052 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1053 info->fill_ratio_needed = 0; 1054 info->factor_mallocs = 0; 1055 info->rows_global = (double)mat->M; 1056 info->columns_global = (double)mat->N; 1057 info->rows_local = (double)mat->m; 1058 info->columns_local = (double)mat->N; 1059 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNC__ 1064 #define __FUNC__ "MatSetOption_MPIAIJ" 1065 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1066 { 1067 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1068 1069 PetscFunctionBegin; 1070 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1071 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1072 op == MAT_COLUMNS_UNSORTED || 1073 op == MAT_COLUMNS_SORTED || 1074 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1075 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1076 MatSetOption(a->A,op); 1077 MatSetOption(a->B,op); 1078 } else if (op == MAT_ROW_ORIENTED) { 1079 a->roworiented = 1; 1080 MatSetOption(a->A,op); 1081 MatSetOption(a->B,op); 1082 } else if (op == MAT_ROWS_SORTED || 1083 op == MAT_ROWS_UNSORTED || 1084 op == MAT_SYMMETRIC || 1085 op == MAT_STRUCTURALLY_SYMMETRIC || 1086 op == MAT_YES_NEW_DIAGONALS) 1087 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1088 else if (op == MAT_COLUMN_ORIENTED) { 1089 a->roworiented = 0; 1090 MatSetOption(a->A,op); 1091 MatSetOption(a->B,op); 1092 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1093 a->donotstash = 1; 1094 } else if (op == MAT_NO_NEW_DIAGONALS){ 1095 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1096 } else { 1097 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1098 } 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNC__ 1103 #define __FUNC__ "MatGetSize_MPIAIJ" 1104 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1105 { 1106 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1107 1108 PetscFunctionBegin; 1109 if (m) *m = mat->M; 1110 if (n) *n = mat->N; 1111 PetscFunctionReturn(0); 1112 } 1113 1114 #undef __FUNC__ 1115 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1116 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1117 { 1118 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1119 1120 PetscFunctionBegin; 1121 if (m) *m = mat->m; 1122 if (n) *n = mat->n; 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNC__ 1127 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1128 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1129 { 1130 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1131 1132 PetscFunctionBegin; 1133 *m = mat->rstart; *n = mat->rend; 1134 PetscFunctionReturn(0); 1135 } 1136 1137 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1138 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1139 1140 #undef __FUNC__ 1141 #define __FUNC__ "MatGetRow_MPIAIJ" 1142 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1143 { 1144 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1145 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1146 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1147 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1148 int *cmap, *idx_p; 1149 1150 PetscFunctionBegin; 1151 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1152 mat->getrowactive = PETSC_TRUE; 1153 1154 if (!mat->rowvalues && (idx || v)) { 1155 /* 1156 allocate enough space to hold information from the longest row. 1157 */ 1158 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1159 int max = 1,m = mat->m,tmp; 1160 for ( i=0; i<m; i++ ) { 1161 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1162 if (max < tmp) { max = tmp; } 1163 } 1164 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1165 CHKPTRQ(mat->rowvalues); 1166 mat->rowindices = (int *) (mat->rowvalues + max); 1167 } 1168 1169 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1170 lrow = row - rstart; 1171 1172 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1173 if (!v) {pvA = 0; pvB = 0;} 1174 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1175 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1176 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1177 nztot = nzA + nzB; 1178 1179 cmap = mat->garray; 1180 if (v || idx) { 1181 if (nztot) { 1182 /* Sort by increasing column numbers, assuming A and B already sorted */ 1183 int imark = -1; 1184 if (v) { 1185 *v = v_p = mat->rowvalues; 1186 for ( i=0; i<nzB; i++ ) { 1187 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1188 else break; 1189 } 1190 imark = i; 1191 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1192 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1193 } 1194 if (idx) { 1195 *idx = idx_p = mat->rowindices; 1196 if (imark > -1) { 1197 for ( i=0; i<imark; i++ ) { 1198 idx_p[i] = cmap[cworkB[i]]; 1199 } 1200 } else { 1201 for ( i=0; i<nzB; i++ ) { 1202 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1203 else break; 1204 } 1205 imark = i; 1206 } 1207 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1208 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1209 } 1210 } 1211 else { 1212 if (idx) *idx = 0; 1213 if (v) *v = 0; 1214 } 1215 } 1216 *nz = nztot; 1217 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1218 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNC__ 1223 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1224 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1225 { 1226 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1227 1228 PetscFunctionBegin; 1229 if (aij->getrowactive == PETSC_FALSE) { 1230 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1231 } 1232 aij->getrowactive = PETSC_FALSE; 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNC__ 1237 #define __FUNC__ "MatNorm_MPIAIJ" 1238 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1239 { 1240 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1241 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1242 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1243 double sum = 0.0; 1244 Scalar *v; 1245 1246 PetscFunctionBegin; 1247 if (aij->size == 1) { 1248 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1249 } else { 1250 if (type == NORM_FROBENIUS) { 1251 v = amat->a; 1252 for (i=0; i<amat->nz; i++ ) { 1253 #if defined(USE_PETSC_COMPLEX) 1254 sum += real(conj(*v)*(*v)); v++; 1255 #else 1256 sum += (*v)*(*v); v++; 1257 #endif 1258 } 1259 v = bmat->a; 1260 for (i=0; i<bmat->nz; i++ ) { 1261 #if defined(USE_PETSC_COMPLEX) 1262 sum += real(conj(*v)*(*v)); v++; 1263 #else 1264 sum += (*v)*(*v); v++; 1265 #endif 1266 } 1267 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1268 *norm = sqrt(*norm); 1269 } else if (type == NORM_1) { /* max column norm */ 1270 double *tmp, *tmp2; 1271 int *jj, *garray = aij->garray; 1272 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1273 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1274 PetscMemzero(tmp,aij->N*sizeof(double)); 1275 *norm = 0.0; 1276 v = amat->a; jj = amat->j; 1277 for ( j=0; j<amat->nz; j++ ) { 1278 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1279 } 1280 v = bmat->a; jj = bmat->j; 1281 for ( j=0; j<bmat->nz; j++ ) { 1282 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1283 } 1284 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1285 for ( j=0; j<aij->N; j++ ) { 1286 if (tmp2[j] > *norm) *norm = tmp2[j]; 1287 } 1288 PetscFree(tmp); PetscFree(tmp2); 1289 } else if (type == NORM_INFINITY) { /* max row norm */ 1290 double ntemp = 0.0; 1291 for ( j=0; j<amat->m; j++ ) { 1292 v = amat->a + amat->i[j] + shift; 1293 sum = 0.0; 1294 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1295 sum += PetscAbsScalar(*v); v++; 1296 } 1297 v = bmat->a + bmat->i[j] + shift; 1298 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1299 sum += PetscAbsScalar(*v); v++; 1300 } 1301 if (sum > ntemp) ntemp = sum; 1302 } 1303 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1304 } else { 1305 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1306 } 1307 } 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #undef __FUNC__ 1312 #define __FUNC__ "MatTranspose_MPIAIJ" 1313 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1314 { 1315 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1316 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1317 int ierr,shift = Aloc->indexshift; 1318 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1319 Mat B; 1320 Scalar *array; 1321 1322 PetscFunctionBegin; 1323 if (matout == PETSC_NULL && M != N) { 1324 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1325 } 1326 1327 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1328 1329 /* copy over the A part */ 1330 Aloc = (Mat_SeqAIJ*) a->A->data; 1331 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1332 row = a->rstart; 1333 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1334 for ( i=0; i<m; i++ ) { 1335 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1336 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1337 } 1338 aj = Aloc->j; 1339 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1340 1341 /* copy over the B part */ 1342 Aloc = (Mat_SeqAIJ*) a->B->data; 1343 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1344 row = a->rstart; 1345 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1346 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1347 for ( i=0; i<m; i++ ) { 1348 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1349 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1350 } 1351 PetscFree(ct); 1352 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1353 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1354 if (matout != PETSC_NULL) { 1355 *matout = B; 1356 } else { 1357 PetscOps *Abops; 1358 struct _MatOps *Aops; 1359 1360 /* This isn't really an in-place transpose .... but free data structures from a */ 1361 PetscFree(a->rowners); 1362 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1363 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1364 if (a->colmap) PetscFree(a->colmap); 1365 if (a->garray) PetscFree(a->garray); 1366 if (a->lvec) VecDestroy(a->lvec); 1367 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1368 PetscFree(a); 1369 1370 /* 1371 This is horrible, horrible code. We need to keep the 1372 A pointers for the bops and ops but copy everything 1373 else from C. 1374 */ 1375 Abops = A->bops; 1376 Aops = A->ops; 1377 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1378 A->bops = Abops; 1379 A->ops = Aops; 1380 PetscHeaderDestroy(B); 1381 } 1382 PetscFunctionReturn(0); 1383 } 1384 1385 #undef __FUNC__ 1386 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1387 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1388 { 1389 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1390 Mat a = aij->A, b = aij->B; 1391 int ierr,s1,s2,s3; 1392 1393 PetscFunctionBegin; 1394 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1395 if (rr) { 1396 VecGetLocalSize_Fast(rr,s1); 1397 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1398 /* Overlap communication with computation. */ 1399 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1400 } 1401 if (ll) { 1402 VecGetLocalSize_Fast(ll,s1); 1403 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1404 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1405 } 1406 /* scale the diagonal block */ 1407 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1408 1409 if (rr) { 1410 /* Do a scatter end and then right scale the off-diagonal block */ 1411 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1412 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1413 } 1414 1415 PetscFunctionReturn(0); 1416 } 1417 1418 1419 extern int MatPrintHelp_SeqAIJ(Mat); 1420 #undef __FUNC__ 1421 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1422 int MatPrintHelp_MPIAIJ(Mat A) 1423 { 1424 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1425 int ierr; 1426 1427 PetscFunctionBegin; 1428 if (!a->rank) { 1429 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1430 } 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #undef __FUNC__ 1435 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1436 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1437 { 1438 PetscFunctionBegin; 1439 *bs = 1; 1440 PetscFunctionReturn(0); 1441 } 1442 #undef __FUNC__ 1443 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1444 int MatSetUnfactored_MPIAIJ(Mat A) 1445 { 1446 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1447 int ierr; 1448 1449 PetscFunctionBegin; 1450 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1451 PetscFunctionReturn(0); 1452 } 1453 1454 #undef __FUNC__ 1455 #define __FUNC__ "MatEqual_MPIAIJ" 1456 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1457 { 1458 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1459 Mat a, b, c, d; 1460 PetscTruth flg; 1461 int ierr; 1462 1463 PetscFunctionBegin; 1464 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1465 a = matA->A; b = matA->B; 1466 c = matB->A; d = matB->B; 1467 1468 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1469 if (flg == PETSC_TRUE) { 1470 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1471 } 1472 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1473 PetscFunctionReturn(0); 1474 } 1475 1476 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1477 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1478 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1479 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1480 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *); 1481 1482 /* -------------------------------------------------------------------*/ 1483 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1484 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1485 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1486 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1487 0,0, 1488 0,0, 1489 0,0, 1490 MatRelax_MPIAIJ, 1491 MatTranspose_MPIAIJ, 1492 MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1493 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1494 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1495 0, 1496 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1497 0,0,0,0, 1498 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1499 0,0, 1500 0,0,MatConvertSameType_MPIAIJ,0,0, 1501 0,0,0, 1502 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1503 MatPrintHelp_MPIAIJ, 1504 MatScale_MPIAIJ,0,0,0, 1505 MatGetBlockSize_MPIAIJ,0,0,0,0, 1506 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ, 1507 0,0,MatGetSubMatrix_MPIAIJ}; 1508 1509 1510 #undef __FUNC__ 1511 #define __FUNC__ "MatCreateMPIAIJ" 1512 /*@C 1513 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1514 (the default parallel PETSc format). For good matrix assembly performance 1515 the user should preallocate the matrix storage by setting the parameters 1516 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1517 performance can be increased by more than a factor of 50. 1518 1519 Input Parameters: 1520 . comm - MPI communicator 1521 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1522 This value should be the same as the local size used in creating the 1523 y vector for the matrix-vector product y = Ax. 1524 . n - This value should be the same as the local size used in creating the 1525 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1526 calculated if N is given) For square matrices n is almost always m. 1527 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1528 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1529 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1530 (same for all local rows) 1531 . d_nzz - array containing the number of nonzeros in the various rows of the 1532 diagonal portion of the local submatrix (possibly different for each row) 1533 or PETSC_NULL. You must leave room for the diagonal entry even if 1534 it is zero. 1535 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1536 submatrix (same for all local rows). 1537 . o_nzz - array containing the number of nonzeros in the various rows of the 1538 off-diagonal portion of the local submatrix (possibly different for 1539 each row) or PETSC_NULL. 1540 1541 Output Parameter: 1542 . A - the matrix 1543 1544 Notes: 1545 The AIJ format (also called the Yale sparse matrix format or 1546 compressed row storage), is fully compatible with standard Fortran 77 1547 storage. That is, the stored row and column indices can begin at 1548 either one (as in Fortran) or zero. See the users manual for details. 1549 1550 The user MUST specify either the local or global matrix dimensions 1551 (possibly both). 1552 1553 By default, this format uses inodes (identical nodes) when possible. 1554 We search for consecutive rows with the same nonzero structure, thereby 1555 reusing matrix information to achieve increased efficiency. 1556 1557 Options Database Keys: 1558 $ -mat_aij_no_inode - Do not use inodes 1559 $ -mat_aij_inode_limit <limit> - Set inode limit. 1560 $ (max limit=5) 1561 $ -mat_aij_oneindex - Internally use indexing starting at 1 1562 $ rather than 0. Note: When calling MatSetValues(), 1563 $ the user still MUST index entries starting at 0! 1564 1565 Storage Information: 1566 For a square global matrix we define each processor's diagonal portion 1567 to be its local rows and the corresponding columns (a square submatrix); 1568 each processor's off-diagonal portion encompasses the remainder of the 1569 local matrix (a rectangular submatrix). 1570 1571 The user can specify preallocated storage for the diagonal part of 1572 the local submatrix with either d_nz or d_nnz (not both). Set 1573 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1574 memory allocation. Likewise, specify preallocated storage for the 1575 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1576 1577 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1578 the figure below we depict these three local rows and all columns (0-11). 1579 1580 $ 0 1 2 3 4 5 6 7 8 9 10 11 1581 $ ------------------- 1582 $ row 3 | o o o d d d o o o o o o 1583 $ row 4 | o o o d d d o o o o o o 1584 $ row 5 | o o o d d d o o o o o o 1585 $ ------------------- 1586 $ 1587 1588 Thus, any entries in the d locations are stored in the d (diagonal) 1589 submatrix, and any entries in the o locations are stored in the 1590 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1591 stored simply in the MATSEQAIJ format for compressed row storage. 1592 1593 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1594 and o_nz should indicate the number of nonzeros per row in the o matrix. 1595 In general, for PDE problems in which most nonzeros are near the diagonal, 1596 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1597 or you will get TERRIBLE performance; see the users' manual chapter on 1598 matrices. 1599 1600 .keywords: matrix, aij, compressed row, sparse, parallel 1601 1602 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1603 @*/ 1604 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N, 1605 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1606 { 1607 Mat B; 1608 Mat_MPIAIJ *b; 1609 int ierr, i,sum[2],work[2],size; 1610 1611 PetscFunctionBegin; 1612 MPI_Comm_size(comm,&size); 1613 if (size == 1) { 1614 if (M == PETSC_DECIDE) M = m; 1615 if (N == PETSC_DECIDE) N = n; 1616 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1617 PetscFunctionReturn(0); 1618 } 1619 1620 *A = 0; 1621 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1622 PLogObjectCreate(B); 1623 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1624 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1625 PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps)); 1626 B->destroy = MatDestroy_MPIAIJ; 1627 B->view = MatView_MPIAIJ; 1628 B->factor = 0; 1629 B->assembled = PETSC_FALSE; 1630 B->mapping = 0; 1631 1632 B->insertmode = NOT_SET_VALUES; 1633 b->size = size; 1634 MPI_Comm_rank(comm,&b->rank); 1635 1636 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1637 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1638 } 1639 1640 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1641 work[0] = m; work[1] = n; 1642 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1643 if (M == PETSC_DECIDE) M = sum[0]; 1644 if (N == PETSC_DECIDE) N = sum[1]; 1645 } 1646 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1647 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1648 b->m = m; B->m = m; 1649 b->n = n; B->n = n; 1650 b->N = N; B->N = N; 1651 b->M = M; B->M = M; 1652 1653 /* build local table of row and column ownerships */ 1654 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1655 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1656 b->cowners = b->rowners + b->size + 2; 1657 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1658 b->rowners[0] = 0; 1659 for ( i=2; i<=b->size; i++ ) { 1660 b->rowners[i] += b->rowners[i-1]; 1661 } 1662 b->rstart = b->rowners[b->rank]; 1663 b->rend = b->rowners[b->rank+1]; 1664 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1665 b->cowners[0] = 0; 1666 for ( i=2; i<=b->size; i++ ) { 1667 b->cowners[i] += b->cowners[i-1]; 1668 } 1669 b->cstart = b->cowners[b->rank]; 1670 b->cend = b->cowners[b->rank+1]; 1671 1672 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1673 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1674 PLogObjectParent(B,b->A); 1675 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1676 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1677 PLogObjectParent(B,b->B); 1678 1679 /* build cache for off array entries formed */ 1680 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1681 b->donotstash = 0; 1682 b->colmap = 0; 1683 b->garray = 0; 1684 b->roworiented = 1; 1685 1686 /* stuff used for matrix vector multiply */ 1687 b->lvec = 0; 1688 b->Mvctx = 0; 1689 1690 /* stuff for MatGetRow() */ 1691 b->rowindices = 0; 1692 b->rowvalues = 0; 1693 b->getrowactive = PETSC_FALSE; 1694 1695 *A = B; 1696 PetscFunctionReturn(0); 1697 } 1698 1699 #undef __FUNC__ 1700 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1701 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1702 { 1703 Mat mat; 1704 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1705 int ierr, len=0, flg; 1706 1707 PetscFunctionBegin; 1708 *newmat = 0; 1709 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1710 PLogObjectCreate(mat); 1711 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1712 PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps)); 1713 mat->destroy = MatDestroy_MPIAIJ; 1714 mat->view = MatView_MPIAIJ; 1715 mat->factor = matin->factor; 1716 mat->assembled = PETSC_TRUE; 1717 1718 a->m = mat->m = oldmat->m; 1719 a->n = mat->n = oldmat->n; 1720 a->M = mat->M = oldmat->M; 1721 a->N = mat->N = oldmat->N; 1722 1723 a->rstart = oldmat->rstart; 1724 a->rend = oldmat->rend; 1725 a->cstart = oldmat->cstart; 1726 a->cend = oldmat->cend; 1727 a->size = oldmat->size; 1728 a->rank = oldmat->rank; 1729 mat->insertmode = NOT_SET_VALUES; 1730 a->rowvalues = 0; 1731 a->getrowactive = PETSC_FALSE; 1732 1733 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1734 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1735 a->cowners = a->rowners + a->size + 2; 1736 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1737 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1738 if (oldmat->colmap) { 1739 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1740 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1741 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1742 } else a->colmap = 0; 1743 if (oldmat->garray) { 1744 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1745 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1746 PLogObjectMemory(mat,len*sizeof(int)); 1747 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1748 } else a->garray = 0; 1749 1750 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1751 PLogObjectParent(mat,a->lvec); 1752 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1753 PLogObjectParent(mat,a->Mvctx); 1754 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1755 PLogObjectParent(mat,a->A); 1756 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1757 PLogObjectParent(mat,a->B); 1758 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1759 if (flg) { 1760 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1761 } 1762 *newmat = mat; 1763 PetscFunctionReturn(0); 1764 } 1765 1766 #include "sys.h" 1767 1768 #undef __FUNC__ 1769 #define __FUNC__ "MatLoad_MPIAIJ" 1770 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1771 { 1772 Mat A; 1773 Scalar *vals,*svals; 1774 MPI_Comm comm = ((PetscObject)viewer)->comm; 1775 MPI_Status status; 1776 int i, nz, ierr, j,rstart, rend, fd; 1777 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1778 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1779 int tag = ((PetscObject)viewer)->tag; 1780 1781 PetscFunctionBegin; 1782 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1783 if (!rank) { 1784 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1785 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1786 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1787 if (header[3] < 0) { 1788 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1789 } 1790 } 1791 1792 1793 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1794 M = header[1]; N = header[2]; 1795 /* determine ownership of all rows */ 1796 m = M/size + ((M % size) > rank); 1797 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1798 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1799 rowners[0] = 0; 1800 for ( i=2; i<=size; i++ ) { 1801 rowners[i] += rowners[i-1]; 1802 } 1803 rstart = rowners[rank]; 1804 rend = rowners[rank+1]; 1805 1806 /* distribute row lengths to all processors */ 1807 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1808 offlens = ourlens + (rend-rstart); 1809 if (!rank) { 1810 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1811 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1812 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1813 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1814 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1815 PetscFree(sndcounts); 1816 } else { 1817 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1818 } 1819 1820 if (!rank) { 1821 /* calculate the number of nonzeros on each processor */ 1822 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1823 PetscMemzero(procsnz,size*sizeof(int)); 1824 for ( i=0; i<size; i++ ) { 1825 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1826 procsnz[i] += rowlengths[j]; 1827 } 1828 } 1829 PetscFree(rowlengths); 1830 1831 /* determine max buffer needed and allocate it */ 1832 maxnz = 0; 1833 for ( i=0; i<size; i++ ) { 1834 maxnz = PetscMax(maxnz,procsnz[i]); 1835 } 1836 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1837 1838 /* read in my part of the matrix column indices */ 1839 nz = procsnz[0]; 1840 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1841 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1842 1843 /* read in every one elses and ship off */ 1844 for ( i=1; i<size; i++ ) { 1845 nz = procsnz[i]; 1846 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1847 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1848 } 1849 PetscFree(cols); 1850 } else { 1851 /* determine buffer space needed for message */ 1852 nz = 0; 1853 for ( i=0; i<m; i++ ) { 1854 nz += ourlens[i]; 1855 } 1856 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1857 1858 /* receive message of column indices*/ 1859 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1860 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1861 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1862 } 1863 1864 /* loop over local rows, determining number of off diagonal entries */ 1865 PetscMemzero(offlens,m*sizeof(int)); 1866 jj = 0; 1867 for ( i=0; i<m; i++ ) { 1868 for ( j=0; j<ourlens[i]; j++ ) { 1869 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1870 jj++; 1871 } 1872 } 1873 1874 /* create our matrix */ 1875 for ( i=0; i<m; i++ ) { 1876 ourlens[i] -= offlens[i]; 1877 } 1878 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1879 A = *newmat; 1880 MatSetOption(A,MAT_COLUMNS_SORTED); 1881 for ( i=0; i<m; i++ ) { 1882 ourlens[i] += offlens[i]; 1883 } 1884 1885 if (!rank) { 1886 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1887 1888 /* read in my part of the matrix numerical values */ 1889 nz = procsnz[0]; 1890 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1891 1892 /* insert into matrix */ 1893 jj = rstart; 1894 smycols = mycols; 1895 svals = vals; 1896 for ( i=0; i<m; i++ ) { 1897 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1898 smycols += ourlens[i]; 1899 svals += ourlens[i]; 1900 jj++; 1901 } 1902 1903 /* read in other processors and ship out */ 1904 for ( i=1; i<size; i++ ) { 1905 nz = procsnz[i]; 1906 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1907 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1908 } 1909 PetscFree(procsnz); 1910 } else { 1911 /* receive numeric values */ 1912 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1913 1914 /* receive message of values*/ 1915 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1916 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1917 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1918 1919 /* insert into matrix */ 1920 jj = rstart; 1921 smycols = mycols; 1922 svals = vals; 1923 for ( i=0; i<m; i++ ) { 1924 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1925 smycols += ourlens[i]; 1926 svals += ourlens[i]; 1927 jj++; 1928 } 1929 } 1930 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1931 1932 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1933 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1934 PetscFunctionReturn(0); 1935 } 1936 1937 #undef __FUNC__ 1938 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 1939 /* 1940 Not great since it makes two copies of the submatrix, first an SeqAIJ 1941 in local and then by concatenating the local matrices the end result. 1942 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1943 */ 1944 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat) 1945 { 1946 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1947 Mat *local,M; 1948 Scalar *vwork,*aa; 1949 MPI_Comm comm = mat->comm; 1950 Mat_SeqAIJ *aij; 1951 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 1952 1953 PetscFunctionBegin; 1954 MPI_Comm_rank(comm,&rank); 1955 MPI_Comm_size(comm,&size); 1956 1957 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1958 1959 /* 1960 m - number of local rows 1961 n - number of columns (same on all processors) 1962 rstart - first row in new global matrix generated 1963 */ 1964 ierr = MatGetSize(*local,&m,&n);CHKERRQ(ierr); 1965 if (call == MAT_INITIAL_MATRIX) { 1966 aij = (Mat_SeqAIJ *) (*local)->data; 1967 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 1968 ii = aij->i; 1969 jj = aij->j; 1970 1971 /* 1972 Determine the number of non-zeros in the diagonal and off-diagonal 1973 portions of the matrix in order to do correct preallocation 1974 */ 1975 1976 /* first get start and end of "diagonal" columns */ 1977 if (csize == PETSC_DECIDE) { 1978 nlocal = n/size + ((n % size) > rank); 1979 } else { 1980 nlocal = csize; 1981 } 1982 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1983 rstart = rend - nlocal; 1984 if (rank == size - 1 && rend != n) { 1985 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 1986 } 1987 1988 /* next, compute all the lengths */ 1989 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 1990 olens = dlens + m; 1991 for ( i=0; i<m; i++ ) { 1992 jend = ii[i+1] - ii[i]; 1993 olen = 0; 1994 dlen = 0; 1995 for ( j=0; j<jend; j++ ) { 1996 if ( *jj < rstart || *jj >= rend) olen++; 1997 else dlen++; 1998 jj++; 1999 } 2000 olens[i] = olen; 2001 dlens[i] = dlen; 2002 } 2003 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2004 PetscFree(dlens); 2005 } else { 2006 int ml,nl; 2007 2008 M = *newmat; 2009 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2010 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2011 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2012 } 2013 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2014 aij = (Mat_SeqAIJ *) (*local)->data; 2015 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2016 ii = aij->i; 2017 jj = aij->j; 2018 aa = aij->a; 2019 for (i=0; i<m; i++) { 2020 row = rstart + i; 2021 nz = ii[i+1] - ii[i]; 2022 cwork = jj; jj += nz; 2023 vwork = aa; aa += nz; 2024 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2025 } 2026 2027 ierr = MatDestroyMatrices(1,&local);CHKERRQ(ierr); 2028 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2029 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2030 *newmat = M; 2031 PetscFunctionReturn(0); 2032 } 2033