1 2 #ifdef PETSC_RCS_HEADER 3 static char vcid[] = "$Id: mpiaij.c,v 1.238 1998/04/15 19:37:32 curfman Exp curfman $"; 4 #endif 5 6 #include "pinclude/pviewer.h" 7 #include "src/mat/impls/aij/mpi/mpiaij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 11 /* local utility routine that creates a mapping from the global column 12 number to the local number in the off-diagonal part of the local 13 storage of the matrix. This is done in a non scable way since the 14 length of colmap equals the global matrix length. 15 */ 16 #undef __FUNC__ 17 #define __FUNC__ "CreateColmap_MPIAIJ_Private" 18 int CreateColmap_MPIAIJ_Private(Mat mat) 19 { 20 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 21 Mat_SeqAIJ *B = (Mat_SeqAIJ*) aij->B->data; 22 int n = B->n,i; 23 24 PetscFunctionBegin; 25 aij->colmap = (int *) PetscMalloc((aij->N+1)*sizeof(int));CHKPTRQ(aij->colmap); 26 PLogObjectMemory(mat,aij->N*sizeof(int)); 27 PetscMemzero(aij->colmap,aij->N*sizeof(int)); 28 for ( i=0; i<n; i++ ) aij->colmap[aij->garray[i]] = i+1; 29 PetscFunctionReturn(0); 30 } 31 32 extern int DisAssemble_MPIAIJ(Mat); 33 34 #define CHUNKSIZE 15 35 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 36 { \ 37 \ 38 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 39 rmax = aimax[row]; nrow = ailen[row]; \ 40 col1 = col - shift; \ 41 \ 42 low = 0; high = nrow; \ 43 while (high-low > 5) { \ 44 t = (low+high)/2; \ 45 if (rp[t] > col) high = t; \ 46 else low = t; \ 47 } \ 48 for ( _i=0; _i<nrow; _i++ ) { \ 49 if (rp[_i] > col1) break; \ 50 if (rp[_i] == col1) { \ 51 if (addv == ADD_VALUES) ap[_i] += value; \ 52 else ap[_i] = value; \ 53 goto a_noinsert; \ 54 } \ 55 } \ 56 if (nonew == 1) goto a_noinsert; \ 57 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 58 if (nrow >= rmax) { \ 59 /* there is no extra room in row, therefore enlarge */ \ 60 int new_nz = ai[a->m] + CHUNKSIZE,len,*new_i,*new_j; \ 61 Scalar *new_a; \ 62 \ 63 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 64 \ 65 /* malloc new storage space */ \ 66 len = new_nz*(sizeof(int)+sizeof(Scalar))+(a->m+1)*sizeof(int); \ 67 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 68 new_j = (int *) (new_a + new_nz); \ 69 new_i = new_j + new_nz; \ 70 \ 71 /* copy over old data into new slots */ \ 72 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} \ 73 for ( ii=row+1; ii<a->m+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 74 PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); \ 75 len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \ 76 PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \ 77 len*sizeof(int)); \ 78 PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(Scalar)); \ 79 PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \ 80 len*sizeof(Scalar)); \ 81 /* free up old matrix storage */ \ 82 \ 83 PetscFree(a->a); \ 84 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} \ 85 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 86 a->singlemalloc = 1; \ 87 \ 88 rp = aj + ai[row] + shift; ap = aa + ai[row] + shift; \ 89 rmax = aimax[row] = aimax[row] + CHUNKSIZE; \ 90 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 91 a->maxnz += CHUNKSIZE; \ 92 a->reallocs++; \ 93 } \ 94 N = nrow++ - 1; a->nz++; \ 95 /* shift up all the later entries in this row */ \ 96 for ( ii=N; ii>=_i; ii-- ) { \ 97 rp[ii+1] = rp[ii]; \ 98 ap[ii+1] = ap[ii]; \ 99 } \ 100 rp[_i] = col1; \ 101 ap[_i] = value; \ 102 a_noinsert: ; \ 103 ailen[row] = nrow; \ 104 } 105 106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 107 { \ 108 \ 109 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 110 rmax = bimax[row]; nrow = bilen[row]; \ 111 col1 = col - shift; \ 112 \ 113 low = 0; high = nrow; \ 114 while (high-low > 5) { \ 115 t = (low+high)/2; \ 116 if (rp[t] > col) high = t; \ 117 else low = t; \ 118 } \ 119 for ( _i=0; _i<nrow; _i++ ) { \ 120 if (rp[_i] > col1) break; \ 121 if (rp[_i] == col1) { \ 122 if (addv == ADD_VALUES) ap[_i] += value; \ 123 else ap[_i] = value; \ 124 goto b_noinsert; \ 125 } \ 126 } \ 127 if (nonew == 1) goto b_noinsert; \ 128 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 129 if (nrow >= rmax) { \ 130 /* there is no extra room in row, therefore enlarge */ \ 131 int new_nz = bi[b->m] + CHUNKSIZE,len,*new_i,*new_j; \ 132 Scalar *new_a; \ 133 \ 134 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 135 \ 136 /* malloc new storage space */ \ 137 len = new_nz*(sizeof(int)+sizeof(Scalar))+(b->m+1)*sizeof(int); \ 138 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); \ 139 new_j = (int *) (new_a + new_nz); \ 140 new_i = new_j + new_nz; \ 141 \ 142 /* copy over old data into new slots */ \ 143 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = bi[ii];} \ 144 for ( ii=row+1; ii<b->m+1; ii++ ) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 145 PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); \ 146 len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \ 147 PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \ 148 len*sizeof(int)); \ 149 PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(Scalar)); \ 150 PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \ 151 len*sizeof(Scalar)); \ 152 /* free up old matrix storage */ \ 153 \ 154 PetscFree(b->a); \ 155 if (!b->singlemalloc) {PetscFree(b->i);PetscFree(b->j);} \ 156 ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 157 b->singlemalloc = 1; \ 158 \ 159 rp = bj + bi[row] + shift; ap = ba + bi[row] + shift; \ 160 rmax = bimax[row] = bimax[row] + CHUNKSIZE; \ 161 PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(Scalar))); \ 162 b->maxnz += CHUNKSIZE; \ 163 b->reallocs++; \ 164 } \ 165 N = nrow++ - 1; b->nz++; \ 166 /* shift up all the later entries in this row */ \ 167 for ( ii=N; ii>=_i; ii-- ) { \ 168 rp[ii+1] = rp[ii]; \ 169 ap[ii+1] = ap[ii]; \ 170 } \ 171 rp[_i] = col1; \ 172 ap[_i] = value; \ 173 b_noinsert: ; \ 174 bilen[row] = nrow; \ 175 } 176 177 extern int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 178 #undef __FUNC__ 179 #define __FUNC__ "MatSetValues_MPIAIJ" 180 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 181 { 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 183 Scalar value; 184 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 185 int cstart = aij->cstart, cend = aij->cend,row,col; 186 int roworiented = aij->roworiented; 187 188 /* Some Variables required in the macro */ 189 Mat A = aij->A; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 191 int *aimax = a->imax, *ai = a->i, *ailen = a->ilen,*aj = a->j; 192 Scalar *aa = a->a; 193 194 Mat B = aij->B; 195 Mat_SeqAIJ *b = (Mat_SeqAIJ *) B->data; 196 int *bimax = b->imax, *bi = b->i, *bilen = b->ilen,*bj = b->j; 197 Scalar *ba = b->a; 198 199 int *rp,ii,nrow,_i,rmax, N, col1,low,high,t; 200 int nonew = a->nonew,shift = a->indexshift; 201 Scalar *ap; 202 203 PetscFunctionBegin; 204 for ( i=0; i<m; i++ ) { 205 #if defined(USE_PETSC_BOPT_g) 206 if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 207 if (im[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 208 #endif 209 if (im[i] >= rstart && im[i] < rend) { 210 row = im[i] - rstart; 211 for ( j=0; j<n; j++ ) { 212 if (in[j] >= cstart && in[j] < cend){ 213 col = in[j] - cstart; 214 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 215 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 216 /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 217 } 218 #if defined(USE_PETSC_BOPT_g) 219 else if (in[j] < 0) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");} 220 else if (in[j] >= aij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");} 221 #endif 222 else { 223 if (mat->was_assembled) { 224 if (!aij->colmap) { 225 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 226 } 227 col = aij->colmap[in[j]] - 1; 228 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 229 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 230 col = in[j]; 231 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 232 B = aij->B; 233 b = (Mat_SeqAIJ *) B->data; 234 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 235 ba = b->a; 236 } 237 } else col = in[j]; 238 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 239 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 240 /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 241 } 242 } 243 } 244 else { 245 if (roworiented && !aij->donotstash) { 246 ierr = StashValues_Private(&aij->stash,im[i],n,in,v+i*n,addv);CHKERRQ(ierr); 247 } 248 else { 249 if (!aij->donotstash) { 250 row = im[i]; 251 for ( j=0; j<n; j++ ) { 252 ierr = StashValues_Private(&aij->stash,row,1,in+j,v+i+j*m,addv);CHKERRQ(ierr); 253 } 254 } 255 } 256 } 257 } 258 PetscFunctionReturn(0); 259 } 260 261 #undef __FUNC__ 262 #define __FUNC__ "MatGetValues_MPIAIJ" 263 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 264 { 265 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 266 int ierr,i,j, rstart = aij->rstart, rend = aij->rend; 267 int cstart = aij->cstart, cend = aij->cend,row,col; 268 269 PetscFunctionBegin; 270 for ( i=0; i<m; i++ ) { 271 if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 272 if (idxm[i] >= aij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 273 if (idxm[i] >= rstart && idxm[i] < rend) { 274 row = idxm[i] - rstart; 275 for ( j=0; j<n; j++ ) { 276 if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 277 if (idxn[j] >= aij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 278 if (idxn[j] >= cstart && idxn[j] < cend){ 279 col = idxn[j] - cstart; 280 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 281 } 282 else { 283 if (!aij->colmap) { 284 ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 285 } 286 col = aij->colmap[idxn[j]] - 1; 287 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 288 else { 289 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j); CHKERRQ(ierr); 290 } 291 } 292 } 293 } else { 294 SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 295 } 296 } 297 PetscFunctionReturn(0); 298 } 299 300 #undef __FUNC__ 301 #define __FUNC__ "MatAssemblyBegin_MPIAIJ" 302 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 303 { 304 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 305 MPI_Comm comm = mat->comm; 306 int size = aij->size, *owners = aij->rowners; 307 int rank = aij->rank,tag = mat->tag, *owner,*starts,count,ierr; 308 MPI_Request *send_waits,*recv_waits; 309 int *nprocs,i,j,idx,*procs,nsends,nreceives,nmax,*work; 310 InsertMode addv; 311 Scalar *rvalues,*svalues; 312 313 PetscFunctionBegin; 314 /* make sure all processors are either in INSERTMODE or ADDMODE */ 315 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);CHKERRQ(ierr); 316 if (addv == (ADD_VALUES|INSERT_VALUES)) { 317 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 318 } 319 mat->insertmode = addv; /* in case this processor had no cache */ 320 321 /* first count number of contributors to each processor */ 322 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 323 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 324 owner = (int *) PetscMalloc( (aij->stash.n+1)*sizeof(int) ); CHKPTRQ(owner); 325 for ( i=0; i<aij->stash.n; i++ ) { 326 idx = aij->stash.idx[i]; 327 for ( j=0; j<size; j++ ) { 328 if (idx >= owners[j] && idx < owners[j+1]) { 329 nprocs[j]++; procs[j] = 1; owner[i] = j; break; 330 } 331 } 332 } 333 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 334 335 /* inform other processors of number of messages and max length*/ 336 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 337 ierr = MPI_Allreduce(procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 338 nreceives = work[rank]; 339 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 340 nmax = work[rank]; 341 PetscFree(work); 342 343 /* post receives: 344 1) each message will consist of ordered pairs 345 (global index,value) we store the global index as a double 346 to simplify the message passing. 347 2) since we don't know how long each individual message is we 348 allocate the largest needed buffer for each receive. Potentially 349 this is a lot of wasted space. 350 351 352 This could be done better. 353 */ 354 rvalues = (Scalar *) PetscMalloc(3*(nreceives+1)*(nmax+1)*sizeof(Scalar));CHKPTRQ(rvalues); 355 recv_waits = (MPI_Request *) PetscMalloc((nreceives+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 356 for ( i=0; i<nreceives; i++ ) { 357 ierr = MPI_Irecv(rvalues+3*nmax*i,3*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag, 358 comm,recv_waits+i);CHKERRQ(ierr); 359 } 360 361 /* do sends: 362 1) starts[i] gives the starting index in svalues for stuff going to 363 the ith processor 364 */ 365 svalues = (Scalar *) PetscMalloc(3*(aij->stash.n+1)*sizeof(Scalar));CHKPTRQ(svalues); 366 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 367 starts = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(starts); 368 starts[0] = 0; 369 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 370 for ( i=0; i<aij->stash.n; i++ ) { 371 svalues[3*starts[owner[i]]] = (Scalar) aij->stash.idx[i]; 372 svalues[3*starts[owner[i]]+1] = (Scalar) aij->stash.idy[i]; 373 svalues[3*(starts[owner[i]]++)+2] = aij->stash.array[i]; 374 } 375 PetscFree(owner); 376 starts[0] = 0; 377 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 378 count = 0; 379 for ( i=0; i<size; i++ ) { 380 if (procs[i]) { 381 ierr = MPI_Isend(svalues+3*starts[i],3*nprocs[i],MPIU_SCALAR,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 382 } 383 } 384 PetscFree(starts); PetscFree(nprocs); 385 386 /* Free cache space */ 387 PLogInfo(mat,"MatAssemblyBegin_MPIAIJ:Number of off-processor values %d\n",aij->stash.n); 388 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 389 390 aij->svalues = svalues; aij->rvalues = rvalues; 391 aij->nsends = nsends; aij->nrecvs = nreceives; 392 aij->send_waits = send_waits; aij->recv_waits = recv_waits; 393 aij->rmax = nmax; 394 395 PetscFunctionReturn(0); 396 } 397 extern int MatSetUpMultiply_MPIAIJ(Mat); 398 399 #undef __FUNC__ 400 #define __FUNC__ "MatAssemblyEnd_MPIAIJ" 401 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 402 { 403 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 404 MPI_Status *send_status,recv_status; 405 int imdex,nrecvs = aij->nrecvs, count = nrecvs, i, n, ierr; 406 int row,col,other_disassembled; 407 Scalar *values,val; 408 InsertMode addv = mat->insertmode; 409 410 PetscFunctionBegin; 411 /* wait on receives */ 412 while (count) { 413 ierr = MPI_Waitany(nrecvs,aij->recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 414 /* unpack receives into our local space */ 415 values = aij->rvalues + 3*imdex*aij->rmax; 416 ierr = MPI_Get_count(&recv_status,MPIU_SCALAR,&n);CHKERRQ(ierr); 417 n = n/3; 418 for ( i=0; i<n; i++ ) { 419 row = (int) PetscReal(values[3*i]) - aij->rstart; 420 col = (int) PetscReal(values[3*i+1]); 421 val = values[3*i+2]; 422 if (col >= aij->cstart && col < aij->cend) { 423 col -= aij->cstart; 424 ierr = MatSetValues(aij->A,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 425 } else { 426 if (mat->was_assembled || mat->assembled) { 427 if (!aij->colmap) { 428 ierr = CreateColmap_MPIAIJ_Private(mat); CHKERRQ(ierr); 429 } 430 col = aij->colmap[col] - 1; 431 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 432 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 433 col = (int) PetscReal(values[3*i+1]); 434 } 435 } 436 ierr = MatSetValues(aij->B,1,&row,1,&col,&val,addv); CHKERRQ(ierr); 437 } 438 } 439 count--; 440 } 441 PetscFree(aij->recv_waits); PetscFree(aij->rvalues); 442 443 /* wait on sends */ 444 if (aij->nsends) { 445 send_status = (MPI_Status *) PetscMalloc(aij->nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 446 ierr = MPI_Waitall(aij->nsends,aij->send_waits,send_status);CHKERRQ(ierr); 447 PetscFree(send_status); 448 } 449 PetscFree(aij->send_waits); PetscFree(aij->svalues); 450 451 ierr = MatAssemblyBegin(aij->A,mode); CHKERRQ(ierr); 452 ierr = MatAssemblyEnd(aij->A,mode); CHKERRQ(ierr); 453 454 /* determine if any processor has disassembled, if so we must 455 also disassemble ourselfs, in order that we may reassemble. */ 456 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 457 if (mat->was_assembled && !other_disassembled) { 458 ierr = DisAssemble_MPIAIJ(mat); CHKERRQ(ierr); 459 } 460 461 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 462 ierr = MatSetUpMultiply_MPIAIJ(mat); CHKERRQ(ierr); 463 } 464 ierr = MatAssemblyBegin(aij->B,mode); CHKERRQ(ierr); 465 ierr = MatAssemblyEnd(aij->B,mode); CHKERRQ(ierr); 466 467 if (aij->rowvalues) {PetscFree(aij->rowvalues); aij->rowvalues = 0;} 468 PetscFunctionReturn(0); 469 } 470 471 #undef __FUNC__ 472 #define __FUNC__ "MatZeroEntries_MPIAIJ" 473 int MatZeroEntries_MPIAIJ(Mat A) 474 { 475 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 476 int ierr; 477 478 PetscFunctionBegin; 479 ierr = MatZeroEntries(l->A); CHKERRQ(ierr); 480 ierr = MatZeroEntries(l->B); CHKERRQ(ierr); 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNC__ 485 #define __FUNC__ "MatZeroRows_MPIAIJ" 486 int MatZeroRows_MPIAIJ(Mat A,IS is,Scalar *diag) 487 { 488 Mat_MPIAIJ *l = (Mat_MPIAIJ *) A->data; 489 int i,ierr,N, *rows,*owners = l->rowners,size = l->size; 490 int *procs,*nprocs,j,found,idx,nsends,*work,row; 491 int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 492 int *rvalues,tag = A->tag,count,base,slen,n,*source; 493 int *lens,imdex,*lrows,*values,rstart=l->rstart; 494 MPI_Comm comm = A->comm; 495 MPI_Request *send_waits,*recv_waits; 496 MPI_Status recv_status,*send_status; 497 IS istmp; 498 499 PetscFunctionBegin; 500 ierr = ISGetSize(is,&N); CHKERRQ(ierr); 501 ierr = ISGetIndices(is,&rows); CHKERRQ(ierr); 502 503 /* first count number of contributors to each processor */ 504 nprocs = (int *) PetscMalloc( 2*size*sizeof(int) ); CHKPTRQ(nprocs); 505 PetscMemzero(nprocs,2*size*sizeof(int)); procs = nprocs + size; 506 owner = (int *) PetscMalloc((N+1)*sizeof(int)); CHKPTRQ(owner); /* see note*/ 507 for ( i=0; i<N; i++ ) { 508 idx = rows[i]; 509 found = 0; 510 for ( j=0; j<size; j++ ) { 511 if (idx >= owners[j] && idx < owners[j+1]) { 512 nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 513 } 514 } 515 if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 516 } 517 nsends = 0; for ( i=0; i<size; i++ ) { nsends += procs[i];} 518 519 /* inform other processors of number of messages and max length*/ 520 work = (int *) PetscMalloc( size*sizeof(int) ); CHKPTRQ(work); 521 ierr = MPI_Allreduce( procs, work,size,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 522 nrecvs = work[rank]; 523 ierr = MPI_Allreduce( nprocs, work,size,MPI_INT,MPI_MAX,comm);CHKERRQ(ierr); 524 nmax = work[rank]; 525 PetscFree(work); 526 527 /* post receives: */ 528 rvalues = (int *) PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 529 recv_waits = (MPI_Request *) PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 530 for ( i=0; i<nrecvs; i++ ) { 531 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 532 } 533 534 /* do sends: 535 1) starts[i] gives the starting index in svalues for stuff going to 536 the ith processor 537 */ 538 svalues = (int *) PetscMalloc( (N+1)*sizeof(int) ); CHKPTRQ(svalues); 539 send_waits = (MPI_Request *) PetscMalloc( (nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 540 starts = (int *) PetscMalloc( (size+1)*sizeof(int) ); CHKPTRQ(starts); 541 starts[0] = 0; 542 for ( i=1; i<size; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 543 for ( i=0; i<N; i++ ) { 544 svalues[starts[owner[i]]++] = rows[i]; 545 } 546 ISRestoreIndices(is,&rows); 547 548 starts[0] = 0; 549 for ( i=1; i<size+1; i++ ) { starts[i] = starts[i-1] + nprocs[i-1];} 550 count = 0; 551 for ( i=0; i<size; i++ ) { 552 if (procs[i]) { 553 ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 554 } 555 } 556 PetscFree(starts); 557 558 base = owners[rank]; 559 560 /* wait on receives */ 561 lens = (int *) PetscMalloc( 2*(nrecvs+1)*sizeof(int) ); CHKPTRQ(lens); 562 source = lens + nrecvs; 563 count = nrecvs; slen = 0; 564 while (count) { 565 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 566 /* unpack receives into our local space */ 567 ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 568 source[imdex] = recv_status.MPI_SOURCE; 569 lens[imdex] = n; 570 slen += n; 571 count--; 572 } 573 PetscFree(recv_waits); 574 575 /* move the data into the send scatter */ 576 lrows = (int *) PetscMalloc( (slen+1)*sizeof(int) ); CHKPTRQ(lrows); 577 count = 0; 578 for ( i=0; i<nrecvs; i++ ) { 579 values = rvalues + i*nmax; 580 for ( j=0; j<lens[i]; j++ ) { 581 lrows[count++] = values[j] - base; 582 } 583 } 584 PetscFree(rvalues); PetscFree(lens); 585 PetscFree(owner); PetscFree(nprocs); 586 587 /* actually zap the local rows */ 588 ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 589 PLogObjectParent(A,istmp); 590 591 ierr = MatZeroRows(l->A,istmp,0); CHKERRQ(ierr); 592 ierr = MatZeroRows(l->B,istmp,0); CHKERRQ(ierr); 593 ierr = ISDestroy(istmp); CHKERRQ(ierr); 594 595 if (diag) { 596 for ( i = 0; i < slen; i++ ) { 597 row = lrows[i] + rstart; 598 MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES); 599 } 600 MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); 601 MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); 602 } 603 PetscFree(lrows); 604 /* wait on sends */ 605 if (nsends) { 606 send_status = (MPI_Status *) PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 607 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 608 PetscFree(send_status); 609 } 610 PetscFree(send_waits); PetscFree(svalues); 611 612 PetscFunctionReturn(0); 613 } 614 615 #undef __FUNC__ 616 #define __FUNC__ "MatMult_MPIAIJ" 617 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 618 { 619 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 620 int ierr,nt; 621 622 PetscFunctionBegin; 623 ierr = VecGetLocalSize(xx,&nt); CHKERRQ(ierr); 624 if (nt != a->n) { 625 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 626 } 627 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 628 ierr = (*a->A->ops->mult)(a->A,xx,yy); CHKERRQ(ierr); 629 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx); CHKERRQ(ierr); 630 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy); CHKERRQ(ierr); 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNC__ 635 #define __FUNC__ "MatMultAdd_MPIAIJ" 636 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 637 { 638 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 639 int ierr; 640 641 PetscFunctionBegin; 642 ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 643 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 644 ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr); 645 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz); CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNC__ 650 #define __FUNC__ "MatMultTrans_MPIAIJ" 651 int MatMultTrans_MPIAIJ(Mat A,Vec xx,Vec yy) 652 { 653 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 654 int ierr; 655 656 PetscFunctionBegin; 657 /* do nondiagonal part */ 658 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 659 /* send it on its way */ 660 ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 661 /* do local part */ 662 ierr = (*a->A->ops->multtrans)(a->A,xx,yy); CHKERRQ(ierr); 663 /* receive remote parts: note this assumes the values are not actually */ 664 /* inserted in yy until the next line, which is true for my implementation*/ 665 /* but is not perhaps always true. */ 666 ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 667 PetscFunctionReturn(0); 668 } 669 670 #undef __FUNC__ 671 #define __FUNC__ "MatMultTransAdd_MPIAIJ" 672 int MatMultTransAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 673 { 674 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 675 int ierr; 676 677 PetscFunctionBegin; 678 /* do nondiagonal part */ 679 ierr = (*a->B->ops->multtrans)(a->B,xx,a->lvec); CHKERRQ(ierr); 680 /* send it on its way */ 681 ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 682 /* do local part */ 683 ierr = (*a->A->ops->multtransadd)(a->A,xx,yy,zz); CHKERRQ(ierr); 684 /* receive remote parts: note this assumes the values are not actually */ 685 /* inserted in yy until the next line, which is true for my implementation*/ 686 /* but is not perhaps always true. */ 687 ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx); CHKERRQ(ierr); 688 PetscFunctionReturn(0); 689 } 690 691 /* 692 This only works correctly for square matrices where the subblock A->A is the 693 diagonal block 694 */ 695 #undef __FUNC__ 696 #define __FUNC__ "MatGetDiagonal_MPIAIJ" 697 int MatGetDiagonal_MPIAIJ(Mat A,Vec v) 698 { 699 int ierr; 700 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 701 702 PetscFunctionBegin; 703 if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); 704 if (a->rstart != a->cstart || a->rend != a->cend) { 705 SETERRQ(PETSC_ERR_ARG_SIZ,0,"row partition must equal col partition"); 706 } 707 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 708 PetscFunctionReturn(0); 709 } 710 711 #undef __FUNC__ 712 #define __FUNC__ "MatScale_MPIAIJ" 713 int MatScale_MPIAIJ(Scalar *aa,Mat A) 714 { 715 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 716 int ierr; 717 718 PetscFunctionBegin; 719 ierr = MatScale(aa,a->A); CHKERRQ(ierr); 720 ierr = MatScale(aa,a->B); CHKERRQ(ierr); 721 PetscFunctionReturn(0); 722 } 723 724 #undef __FUNC__ 725 #define __FUNC__ "MatDestroy_MPIAIJ" 726 int MatDestroy_MPIAIJ(Mat mat) 727 { 728 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 729 int ierr; 730 731 PetscFunctionBegin; 732 #if defined(USE_PETSC_LOG) 733 PLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",aij->M,aij->N); 734 #endif 735 ierr = StashDestroy_Private(&aij->stash); CHKERRQ(ierr); 736 PetscFree(aij->rowners); 737 ierr = MatDestroy(aij->A); CHKERRQ(ierr); 738 ierr = MatDestroy(aij->B); CHKERRQ(ierr); 739 if (aij->colmap) PetscFree(aij->colmap); 740 if (aij->garray) PetscFree(aij->garray); 741 if (aij->lvec) VecDestroy(aij->lvec); 742 if (aij->Mvctx) VecScatterDestroy(aij->Mvctx); 743 if (aij->rowvalues) PetscFree(aij->rowvalues); 744 PetscFree(aij); 745 PLogObjectDestroy(mat); 746 PetscHeaderDestroy(mat); 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNC__ 751 #define __FUNC__ "MatView_MPIAIJ_Binary" 752 extern int MatView_MPIAIJ_Binary(Mat mat,Viewer viewer) 753 { 754 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 755 int ierr; 756 757 PetscFunctionBegin; 758 if (aij->size == 1) { 759 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 760 } 761 else SETERRQ(PETSC_ERR_SUP,0,"Only uniprocessor output supported"); 762 PetscFunctionReturn(0); 763 } 764 765 #undef __FUNC__ 766 #define __FUNC__ "MatView_MPIAIJ_ASCIIorDraworMatlab" 767 extern int MatView_MPIAIJ_ASCIIorDraworMatlab(Mat mat,Viewer viewer) 768 { 769 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 770 Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data; 771 int ierr, format,shift = C->indexshift,rank; 772 FILE *fd; 773 ViewerType vtype; 774 775 PetscFunctionBegin; 776 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 777 if (vtype == ASCII_FILES_VIEWER || vtype == ASCII_FILE_VIEWER) { 778 ierr = ViewerGetFormat(viewer,&format); 779 if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 780 MatInfo info; 781 int flg; 782 MPI_Comm_rank(mat->comm,&rank); 783 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 784 ierr = MatGetInfo(mat,MAT_LOCAL,&info); 785 ierr = OptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg); CHKERRQ(ierr); 786 PetscSequentialPhaseBegin(mat->comm,1); 787 if (flg) fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n", 788 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 789 else fprintf(fd,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n", 790 rank,aij->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory); 791 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info); 792 fprintf(fd,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used); 793 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info); 794 fprintf(fd,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used); 795 fflush(fd); 796 PetscSequentialPhaseEnd(mat->comm,1); 797 ierr = VecScatterView(aij->Mvctx,viewer); CHKERRQ(ierr); 798 PetscFunctionReturn(0); 799 } else if (format == VIEWER_FORMAT_ASCII_INFO) { 800 PetscFunctionReturn(0); 801 } 802 } 803 804 if (vtype == DRAW_VIEWER) { 805 Draw draw; 806 PetscTruth isnull; 807 ierr = ViewerDrawGetDraw(viewer,&draw); CHKERRQ(ierr); 808 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 809 } 810 811 if (vtype == ASCII_FILE_VIEWER) { 812 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 813 PetscSequentialPhaseBegin(mat->comm,1); 814 fprintf(fd,"[%d] rows %d starts %d ends %d cols %d starts %d ends %d\n", 815 aij->rank,aij->m,aij->rstart,aij->rend,aij->n,aij->cstart, 816 aij->cend); 817 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 818 ierr = MatView(aij->B,viewer); CHKERRQ(ierr); 819 fflush(fd); 820 PetscSequentialPhaseEnd(mat->comm,1); 821 } else { 822 int size = aij->size; 823 rank = aij->rank; 824 if (size == 1) { 825 ierr = MatView(aij->A,viewer); CHKERRQ(ierr); 826 } else { 827 /* assemble the entire matrix onto first processor. */ 828 Mat A; 829 Mat_SeqAIJ *Aloc; 830 int M = aij->M, N = aij->N,m,*ai,*aj,row,*cols,i,*ct; 831 Scalar *a; 832 833 if (!rank) { 834 ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 835 } else { 836 ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 837 } 838 PLogObjectParent(mat,A); 839 840 /* copy over the A part */ 841 Aloc = (Mat_SeqAIJ*) aij->A->data; 842 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 843 row = aij->rstart; 844 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += aij->cstart + shift;} 845 for ( i=0; i<m; i++ ) { 846 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 847 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 848 } 849 aj = Aloc->j; 850 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= aij->cstart + shift;} 851 852 /* copy over the B part */ 853 Aloc = (Mat_SeqAIJ*) aij->B->data; 854 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 855 row = aij->rstart; 856 ct = cols = (int *) PetscMalloc( (ai[m]+1)*sizeof(int) ); CHKPTRQ(cols); 857 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = aij->garray[aj[i]+shift];} 858 for ( i=0; i<m; i++ ) { 859 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 860 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 861 } 862 PetscFree(ct); 863 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 864 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 865 /* 866 Everyone has to call to draw the matrix since the graphics waits are 867 synchronized across all processors that share the Draw object 868 */ 869 if (!rank || vtype == DRAW_VIEWER) { 870 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,viewer); CHKERRQ(ierr); 871 } 872 ierr = MatDestroy(A); CHKERRQ(ierr); 873 } 874 } 875 PetscFunctionReturn(0); 876 } 877 878 #undef __FUNC__ 879 #define __FUNC__ "MatView_MPIAIJ" 880 int MatView_MPIAIJ(Mat mat,Viewer viewer) 881 { 882 int ierr; 883 ViewerType vtype; 884 885 PetscFunctionBegin; 886 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 887 if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER || 888 vtype == DRAW_VIEWER || vtype == MATLAB_VIEWER) { 889 ierr = MatView_MPIAIJ_ASCIIorDraworMatlab(mat,viewer); CHKERRQ(ierr); 890 } else if (vtype == BINARY_FILE_VIEWER) { 891 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 892 } else { 893 SETERRQ(1,1,"Viewer type not supported by PETSc object"); 894 } 895 PetscFunctionReturn(0); 896 } 897 898 /* 899 This has to provide several versions. 900 901 2) a) use only local smoothing updating outer values only once. 902 b) local smoothing updating outer values each inner iteration 903 3) color updating out values betwen colors. 904 */ 905 #undef __FUNC__ 906 #define __FUNC__ "MatRelax_MPIAIJ" 907 int MatRelax_MPIAIJ(Mat matin,Vec bb,double omega,MatSORType flag, 908 double fshift,int its,Vec xx) 909 { 910 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 911 Mat AA = mat->A, BB = mat->B; 912 Mat_SeqAIJ *A = (Mat_SeqAIJ *) AA->data, *B = (Mat_SeqAIJ *)BB->data; 913 Scalar *b,*x,*xs,*ls,d,*v,sum; 914 int ierr,*idx, *diag; 915 int n = mat->n, m = mat->m, i,shift = A->indexshift; 916 917 PetscFunctionBegin; 918 VecGetArray(xx,&x); VecGetArray(bb,&b); VecGetArray(mat->lvec,&ls); 919 xs = x + shift; /* shift by one for index start of 1 */ 920 ls = ls + shift; 921 if (!A->diag) {ierr = MatMarkDiag_SeqAIJ(AA); CHKERRQ(ierr);} 922 diag = A->diag; 923 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 924 if (flag & SOR_ZERO_INITIAL_GUESS) { 925 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 926 PetscFunctionReturn(0); 927 } 928 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 929 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 930 while (its--) { 931 /* go down through the rows */ 932 for ( i=0; i<m; i++ ) { 933 n = A->i[i+1] - A->i[i]; 934 idx = A->j + A->i[i] + shift; 935 v = A->a + A->i[i] + shift; 936 sum = b[i]; 937 SPARSEDENSEMDOT(sum,xs,v,idx,n); 938 d = fshift + A->a[diag[i]+shift]; 939 n = B->i[i+1] - B->i[i]; 940 idx = B->j + B->i[i] + shift; 941 v = B->a + B->i[i] + shift; 942 SPARSEDENSEMDOT(sum,ls,v,idx,n); 943 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 944 } 945 /* come up through the rows */ 946 for ( i=m-1; i>-1; i-- ) { 947 n = A->i[i+1] - A->i[i]; 948 idx = A->j + A->i[i] + shift; 949 v = A->a + A->i[i] + shift; 950 sum = b[i]; 951 SPARSEDENSEMDOT(sum,xs,v,idx,n); 952 d = fshift + A->a[diag[i]+shift]; 953 n = B->i[i+1] - B->i[i]; 954 idx = B->j + B->i[i] + shift; 955 v = B->a + B->i[i] + shift; 956 SPARSEDENSEMDOT(sum,ls,v,idx,n); 957 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 958 } 959 } 960 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 961 if (flag & SOR_ZERO_INITIAL_GUESS) { 962 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 ierr=VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 966 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr); 967 while (its--) { 968 for ( i=0; i<m; i++ ) { 969 n = A->i[i+1] - A->i[i]; 970 idx = A->j + A->i[i] + shift; 971 v = A->a + A->i[i] + shift; 972 sum = b[i]; 973 SPARSEDENSEMDOT(sum,xs,v,idx,n); 974 d = fshift + A->a[diag[i]+shift]; 975 n = B->i[i+1] - B->i[i]; 976 idx = B->j + B->i[i] + shift; 977 v = B->a + B->i[i] + shift; 978 SPARSEDENSEMDOT(sum,ls,v,idx,n); 979 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 980 } 981 } 982 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 983 if (flag & SOR_ZERO_INITIAL_GUESS) { 984 ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,its,xx);CHKERRQ(ierr); 985 PetscFunctionReturn(0); 986 } 987 ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 988 mat->Mvctx); CHKERRQ(ierr); 989 ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD, 990 mat->Mvctx); CHKERRQ(ierr); 991 while (its--) { 992 for ( i=m-1; i>-1; i-- ) { 993 n = A->i[i+1] - A->i[i]; 994 idx = A->j + A->i[i] + shift; 995 v = A->a + A->i[i] + shift; 996 sum = b[i]; 997 SPARSEDENSEMDOT(sum,xs,v,idx,n); 998 d = fshift + A->a[diag[i]+shift]; 999 n = B->i[i+1] - B->i[i]; 1000 idx = B->j + B->i[i] + shift; 1001 v = B->a + B->i[i] + shift; 1002 SPARSEDENSEMDOT(sum,ls,v,idx,n); 1003 x[i] = (1. - omega)*x[i] + omega*(sum + A->a[diag[i]+shift]*x[i])/d; 1004 } 1005 } 1006 } else { 1007 SETERRQ(PETSC_ERR_SUP,0,"Parallel SOR not supported"); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNC__ 1013 #define __FUNC__ "MatGetInfo_MPIAIJ" 1014 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1015 { 1016 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1017 Mat A = mat->A, B = mat->B; 1018 int ierr; 1019 double isend[5], irecv[5]; 1020 1021 PetscFunctionBegin; 1022 info->block_size = 1.0; 1023 ierr = MatGetInfo(A,MAT_LOCAL,info); CHKERRQ(ierr); 1024 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1025 isend[3] = info->memory; isend[4] = info->mallocs; 1026 ierr = MatGetInfo(B,MAT_LOCAL,info); CHKERRQ(ierr); 1027 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1028 isend[3] += info->memory; isend[4] += info->mallocs; 1029 if (flag == MAT_LOCAL) { 1030 info->nz_used = isend[0]; 1031 info->nz_allocated = isend[1]; 1032 info->nz_unneeded = isend[2]; 1033 info->memory = isend[3]; 1034 info->mallocs = isend[4]; 1035 } else if (flag == MAT_GLOBAL_MAX) { 1036 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1037 info->nz_used = irecv[0]; 1038 info->nz_allocated = irecv[1]; 1039 info->nz_unneeded = irecv[2]; 1040 info->memory = irecv[3]; 1041 info->mallocs = irecv[4]; 1042 } else if (flag == MAT_GLOBAL_SUM) { 1043 ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1044 info->nz_used = irecv[0]; 1045 info->nz_allocated = irecv[1]; 1046 info->nz_unneeded = irecv[2]; 1047 info->memory = irecv[3]; 1048 info->mallocs = irecv[4]; 1049 } 1050 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1051 info->fill_ratio_needed = 0; 1052 info->factor_mallocs = 0; 1053 info->rows_global = (double)mat->M; 1054 info->columns_global = (double)mat->N; 1055 info->rows_local = (double)mat->m; 1056 info->columns_local = (double)mat->N; 1057 1058 PetscFunctionReturn(0); 1059 } 1060 1061 #undef __FUNC__ 1062 #define __FUNC__ "MatSetOption_MPIAIJ" 1063 int MatSetOption_MPIAIJ(Mat A,MatOption op) 1064 { 1065 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1066 1067 PetscFunctionBegin; 1068 if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1069 op == MAT_YES_NEW_NONZERO_LOCATIONS || 1070 op == MAT_COLUMNS_UNSORTED || 1071 op == MAT_COLUMNS_SORTED || 1072 op == MAT_NEW_NONZERO_ALLOCATION_ERROR || 1073 op == MAT_NEW_NONZERO_LOCATION_ERROR) { 1074 MatSetOption(a->A,op); 1075 MatSetOption(a->B,op); 1076 } else if (op == MAT_ROW_ORIENTED) { 1077 a->roworiented = 1; 1078 MatSetOption(a->A,op); 1079 MatSetOption(a->B,op); 1080 } else if (op == MAT_ROWS_SORTED || 1081 op == MAT_ROWS_UNSORTED || 1082 op == MAT_SYMMETRIC || 1083 op == MAT_STRUCTURALLY_SYMMETRIC || 1084 op == MAT_YES_NEW_DIAGONALS) 1085 PLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n"); 1086 else if (op == MAT_COLUMN_ORIENTED) { 1087 a->roworiented = 0; 1088 MatSetOption(a->A,op); 1089 MatSetOption(a->B,op); 1090 } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1091 a->donotstash = 1; 1092 } else if (op == MAT_NO_NEW_DIAGONALS){ 1093 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1094 } else { 1095 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1096 } 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNC__ 1101 #define __FUNC__ "MatGetSize_MPIAIJ" 1102 int MatGetSize_MPIAIJ(Mat matin,int *m,int *n) 1103 { 1104 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1105 1106 PetscFunctionBegin; 1107 if (m) *m = mat->M; 1108 if (n) *n = mat->N; 1109 PetscFunctionReturn(0); 1110 } 1111 1112 #undef __FUNC__ 1113 #define __FUNC__ "MatGetLocalSize_MPIAIJ" 1114 int MatGetLocalSize_MPIAIJ(Mat matin,int *m,int *n) 1115 { 1116 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1117 1118 PetscFunctionBegin; 1119 if (m) *m = mat->m; 1120 if (n) *n = mat->n; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNC__ 1125 #define __FUNC__ "MatGetOwnershipRange_MPIAIJ" 1126 int MatGetOwnershipRange_MPIAIJ(Mat matin,int *m,int *n) 1127 { 1128 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1129 1130 PetscFunctionBegin; 1131 *m = mat->rstart; *n = mat->rend; 1132 PetscFunctionReturn(0); 1133 } 1134 1135 extern int MatGetRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1136 extern int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,Scalar**); 1137 1138 #undef __FUNC__ 1139 #define __FUNC__ "MatGetRow_MPIAIJ" 1140 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1141 { 1142 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) matin->data; 1143 Scalar *vworkA, *vworkB, **pvA, **pvB,*v_p; 1144 int i, ierr, *cworkA, *cworkB, **pcA, **pcB, cstart = mat->cstart; 1145 int nztot, nzA, nzB, lrow, rstart = mat->rstart, rend = mat->rend; 1146 int *cmap, *idx_p; 1147 1148 PetscFunctionBegin; 1149 if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1150 mat->getrowactive = PETSC_TRUE; 1151 1152 if (!mat->rowvalues && (idx || v)) { 1153 /* 1154 allocate enough space to hold information from the longest row. 1155 */ 1156 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *) mat->A->data,*Ba = (Mat_SeqAIJ *) mat->B->data; 1157 int max = 1,m = mat->m,tmp; 1158 for ( i=0; i<m; i++ ) { 1159 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1160 if (max < tmp) { max = tmp; } 1161 } 1162 mat->rowvalues = (Scalar *) PetscMalloc( max*(sizeof(int)+sizeof(Scalar))); 1163 CHKPTRQ(mat->rowvalues); 1164 mat->rowindices = (int *) (mat->rowvalues + max); 1165 } 1166 1167 if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Only local rows") 1168 lrow = row - rstart; 1169 1170 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1171 if (!v) {pvA = 0; pvB = 0;} 1172 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1173 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1174 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1175 nztot = nzA + nzB; 1176 1177 cmap = mat->garray; 1178 if (v || idx) { 1179 if (nztot) { 1180 /* Sort by increasing column numbers, assuming A and B already sorted */ 1181 int imark = -1; 1182 if (v) { 1183 *v = v_p = mat->rowvalues; 1184 for ( i=0; i<nzB; i++ ) { 1185 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1186 else break; 1187 } 1188 imark = i; 1189 for ( i=0; i<nzA; i++ ) v_p[imark+i] = vworkA[i]; 1190 for ( i=imark; i<nzB; i++ ) v_p[nzA+i] = vworkB[i]; 1191 } 1192 if (idx) { 1193 *idx = idx_p = mat->rowindices; 1194 if (imark > -1) { 1195 for ( i=0; i<imark; i++ ) { 1196 idx_p[i] = cmap[cworkB[i]]; 1197 } 1198 } else { 1199 for ( i=0; i<nzB; i++ ) { 1200 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1201 else break; 1202 } 1203 imark = i; 1204 } 1205 for ( i=0; i<nzA; i++ ) idx_p[imark+i] = cstart + cworkA[i]; 1206 for ( i=imark; i<nzB; i++ ) idx_p[nzA+i] = cmap[cworkB[i]]; 1207 } 1208 } 1209 else { 1210 if (idx) *idx = 0; 1211 if (v) *v = 0; 1212 } 1213 } 1214 *nz = nztot; 1215 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA); CHKERRQ(ierr); 1216 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB); CHKERRQ(ierr); 1217 PetscFunctionReturn(0); 1218 } 1219 1220 #undef __FUNC__ 1221 #define __FUNC__ "MatRestoreRow_MPIAIJ" 1222 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1223 { 1224 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1225 1226 PetscFunctionBegin; 1227 if (aij->getrowactive == PETSC_FALSE) { 1228 SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1229 } 1230 aij->getrowactive = PETSC_FALSE; 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNC__ 1235 #define __FUNC__ "MatNorm_MPIAIJ" 1236 int MatNorm_MPIAIJ(Mat mat,NormType type,double *norm) 1237 { 1238 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1239 Mat_SeqAIJ *amat = (Mat_SeqAIJ*) aij->A->data, *bmat = (Mat_SeqAIJ*) aij->B->data; 1240 int ierr, i, j, cstart = aij->cstart,shift = amat->indexshift; 1241 double sum = 0.0; 1242 Scalar *v; 1243 1244 PetscFunctionBegin; 1245 if (aij->size == 1) { 1246 ierr = MatNorm(aij->A,type,norm); CHKERRQ(ierr); 1247 } else { 1248 if (type == NORM_FROBENIUS) { 1249 v = amat->a; 1250 for (i=0; i<amat->nz; i++ ) { 1251 #if defined(USE_PETSC_COMPLEX) 1252 sum += real(conj(*v)*(*v)); v++; 1253 #else 1254 sum += (*v)*(*v); v++; 1255 #endif 1256 } 1257 v = bmat->a; 1258 for (i=0; i<bmat->nz; i++ ) { 1259 #if defined(USE_PETSC_COMPLEX) 1260 sum += real(conj(*v)*(*v)); v++; 1261 #else 1262 sum += (*v)*(*v); v++; 1263 #endif 1264 } 1265 ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1266 *norm = sqrt(*norm); 1267 } else if (type == NORM_1) { /* max column norm */ 1268 double *tmp, *tmp2; 1269 int *jj, *garray = aij->garray; 1270 tmp = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp); 1271 tmp2 = (double *) PetscMalloc( (aij->N+1)*sizeof(double) ); CHKPTRQ(tmp2); 1272 PetscMemzero(tmp,aij->N*sizeof(double)); 1273 *norm = 0.0; 1274 v = amat->a; jj = amat->j; 1275 for ( j=0; j<amat->nz; j++ ) { 1276 tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++; 1277 } 1278 v = bmat->a; jj = bmat->j; 1279 for ( j=0; j<bmat->nz; j++ ) { 1280 tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++; 1281 } 1282 ierr = MPI_Allreduce(tmp,tmp2,aij->N,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 1283 for ( j=0; j<aij->N; j++ ) { 1284 if (tmp2[j] > *norm) *norm = tmp2[j]; 1285 } 1286 PetscFree(tmp); PetscFree(tmp2); 1287 } else if (type == NORM_INFINITY) { /* max row norm */ 1288 double ntemp = 0.0; 1289 for ( j=0; j<amat->m; j++ ) { 1290 v = amat->a + amat->i[j] + shift; 1291 sum = 0.0; 1292 for ( i=0; i<amat->i[j+1]-amat->i[j]; i++ ) { 1293 sum += PetscAbsScalar(*v); v++; 1294 } 1295 v = bmat->a + bmat->i[j] + shift; 1296 for ( i=0; i<bmat->i[j+1]-bmat->i[j]; i++ ) { 1297 sum += PetscAbsScalar(*v); v++; 1298 } 1299 if (sum > ntemp) ntemp = sum; 1300 } 1301 ierr = MPI_Allreduce(&ntemp,norm,1,MPI_DOUBLE,MPI_MAX,mat->comm);CHKERRQ(ierr); 1302 } else { 1303 SETERRQ(PETSC_ERR_SUP,0,"No support for two norm"); 1304 } 1305 } 1306 PetscFunctionReturn(0); 1307 } 1308 1309 #undef __FUNC__ 1310 #define __FUNC__ "MatTranspose_MPIAIJ" 1311 int MatTranspose_MPIAIJ(Mat A,Mat *matout) 1312 { 1313 Mat_MPIAIJ *a = (Mat_MPIAIJ *) A->data; 1314 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *) a->A->data; 1315 int ierr,shift = Aloc->indexshift; 1316 int M = a->M, N = a->N,m,*ai,*aj,row,*cols,i,*ct; 1317 Mat B; 1318 Scalar *array; 1319 1320 PetscFunctionBegin; 1321 if (matout == PETSC_NULL && M != N) { 1322 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1323 } 1324 1325 ierr = MatCreateMPIAIJ(A->comm,a->n,a->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1326 1327 /* copy over the A part */ 1328 Aloc = (Mat_SeqAIJ*) a->A->data; 1329 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1330 row = a->rstart; 1331 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] += a->cstart + shift;} 1332 for ( i=0; i<m; i++ ) { 1333 ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1334 row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1335 } 1336 aj = Aloc->j; 1337 for ( i=0; i<ai[m]+shift; i++ ) {aj[i] -= a->cstart + shift;} 1338 1339 /* copy over the B part */ 1340 Aloc = (Mat_SeqAIJ*) a->B->data; 1341 m = Aloc->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a; 1342 row = a->rstart; 1343 ct = cols = (int *) PetscMalloc( (1+ai[m]-shift)*sizeof(int) ); CHKPTRQ(cols); 1344 for ( i=0; i<ai[m]+shift; i++ ) {cols[i] = a->garray[aj[i]+shift];} 1345 for ( i=0; i<m; i++ ) { 1346 ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1347 row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1348 } 1349 PetscFree(ct); 1350 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1351 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1352 if (matout != PETSC_NULL) { 1353 *matout = B; 1354 } else { 1355 PetscOps *Abops; 1356 struct _MatOps *Aops; 1357 1358 /* This isn't really an in-place transpose .... but free data structures from a */ 1359 PetscFree(a->rowners); 1360 ierr = MatDestroy(a->A); CHKERRQ(ierr); 1361 ierr = MatDestroy(a->B); CHKERRQ(ierr); 1362 if (a->colmap) PetscFree(a->colmap); 1363 if (a->garray) PetscFree(a->garray); 1364 if (a->lvec) VecDestroy(a->lvec); 1365 if (a->Mvctx) VecScatterDestroy(a->Mvctx); 1366 PetscFree(a); 1367 1368 /* 1369 This is horrible, horrible code. We need to keep the 1370 A pointers for the bops and ops but copy everything 1371 else from C. 1372 */ 1373 Abops = A->bops; 1374 Aops = A->ops; 1375 PetscMemcpy(A,B,sizeof(struct _p_Mat)); 1376 A->bops = Abops; 1377 A->ops = Aops; 1378 PetscHeaderDestroy(B); 1379 } 1380 PetscFunctionReturn(0); 1381 } 1382 1383 #undef __FUNC__ 1384 #define __FUNC__ "MatDiagonalScale_MPIAIJ" 1385 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1386 { 1387 Mat_MPIAIJ *aij = (Mat_MPIAIJ *) mat->data; 1388 Mat a = aij->A, b = aij->B; 1389 int ierr,s1,s2,s3; 1390 1391 PetscFunctionBegin; 1392 ierr = MatGetLocalSize(mat,&s2,&s3); CHKERRQ(ierr); 1393 if (rr) { 1394 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1395 if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1396 /* Overlap communication with computation. */ 1397 ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1398 } 1399 if (ll) { 1400 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1401 if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1402 ierr = (*b->ops->diagonalscale)(b,ll,0); CHKERRQ(ierr); 1403 } 1404 /* scale the diagonal block */ 1405 ierr = (*a->ops->diagonalscale)(a,ll,rr); CHKERRQ(ierr); 1406 1407 if (rr) { 1408 /* Do a scatter end and then right scale the off-diagonal block */ 1409 ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx); CHKERRQ(ierr); 1410 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec); CHKERRQ(ierr); 1411 } 1412 1413 PetscFunctionReturn(0); 1414 } 1415 1416 1417 extern int MatPrintHelp_SeqAIJ(Mat); 1418 #undef __FUNC__ 1419 #define __FUNC__ "MatPrintHelp_MPIAIJ" 1420 int MatPrintHelp_MPIAIJ(Mat A) 1421 { 1422 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1423 int ierr; 1424 1425 PetscFunctionBegin; 1426 if (!a->rank) { 1427 ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr); 1428 } 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNC__ 1433 #define __FUNC__ "MatGetBlockSize_MPIAIJ" 1434 int MatGetBlockSize_MPIAIJ(Mat A,int *bs) 1435 { 1436 PetscFunctionBegin; 1437 *bs = 1; 1438 PetscFunctionReturn(0); 1439 } 1440 #undef __FUNC__ 1441 #define __FUNC__ "MatSetUnfactored_MPIAIJ" 1442 int MatSetUnfactored_MPIAIJ(Mat A) 1443 { 1444 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 1445 int ierr; 1446 1447 PetscFunctionBegin; 1448 ierr = MatSetUnfactored(a->A); CHKERRQ(ierr); 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNC__ 1453 #define __FUNC__ "MatEqual_MPIAIJ" 1454 int MatEqual_MPIAIJ(Mat A, Mat B, PetscTruth *flag) 1455 { 1456 Mat_MPIAIJ *matB = (Mat_MPIAIJ *) B->data,*matA = (Mat_MPIAIJ *) A->data; 1457 Mat a, b, c, d; 1458 PetscTruth flg; 1459 int ierr; 1460 1461 PetscFunctionBegin; 1462 if (B->type != MATMPIAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1463 a = matA->A; b = matA->B; 1464 c = matB->A; d = matB->B; 1465 1466 ierr = MatEqual(a, c, &flg); CHKERRQ(ierr); 1467 if (flg == PETSC_TRUE) { 1468 ierr = MatEqual(b, d, &flg); CHKERRQ(ierr); 1469 } 1470 ierr = MPI_Allreduce(&flg, flag, 1, MPI_INT, MPI_LAND, A->comm);CHKERRQ(ierr); 1471 PetscFunctionReturn(0); 1472 } 1473 1474 extern int MatConvertSameType_MPIAIJ(Mat,Mat *,int); 1475 extern int MatIncreaseOverlap_MPIAIJ(Mat , int, IS *, int); 1476 extern int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring); 1477 extern int MatGetSubMatrices_MPIAIJ (Mat ,int , IS *,IS *,MatGetSubMatrixCall,Mat **); 1478 extern int MatGetSubMatrix_MPIAIJ (Mat ,IS,IS,int,MatGetSubMatrixCall,Mat *); 1479 1480 /* -------------------------------------------------------------------*/ 1481 static struct _MatOps MatOps = {MatSetValues_MPIAIJ, 1482 MatGetRow_MPIAIJ,MatRestoreRow_MPIAIJ, 1483 MatMult_MPIAIJ,MatMultAdd_MPIAIJ, 1484 MatMultTrans_MPIAIJ,MatMultTransAdd_MPIAIJ, 1485 0,0, 1486 0,0, 1487 0,0, 1488 MatRelax_MPIAIJ, 1489 MatTranspose_MPIAIJ, 1490 MatGetInfo_MPIAIJ,MatEqual_MPIAIJ, 1491 MatGetDiagonal_MPIAIJ,MatDiagonalScale_MPIAIJ,MatNorm_MPIAIJ, 1492 MatAssemblyBegin_MPIAIJ,MatAssemblyEnd_MPIAIJ, 1493 0, 1494 MatSetOption_MPIAIJ,MatZeroEntries_MPIAIJ,MatZeroRows_MPIAIJ, 1495 0,0,0,0, 1496 MatGetSize_MPIAIJ,MatGetLocalSize_MPIAIJ,MatGetOwnershipRange_MPIAIJ, 1497 0,0, 1498 0,0,MatConvertSameType_MPIAIJ,0,0, 1499 0,0,0, 1500 MatGetSubMatrices_MPIAIJ,MatIncreaseOverlap_MPIAIJ,MatGetValues_MPIAIJ,0, 1501 MatPrintHelp_MPIAIJ, 1502 MatScale_MPIAIJ,0,0,0, 1503 MatGetBlockSize_MPIAIJ,0,0,0,0, 1504 MatFDColoringCreate_MPIAIJ,0,MatSetUnfactored_MPIAIJ, 1505 0,0,MatGetSubMatrix_MPIAIJ}; 1506 1507 1508 #undef __FUNC__ 1509 #define __FUNC__ "MatCreateMPIAIJ" 1510 /*@C 1511 MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format 1512 (the default parallel PETSc format). For good matrix assembly performance 1513 the user should preallocate the matrix storage by setting the parameters 1514 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 1515 performance can be increased by more than a factor of 50. 1516 1517 Input Parameters: 1518 . comm - MPI communicator 1519 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 1520 This value should be the same as the local size used in creating the 1521 y vector for the matrix-vector product y = Ax. 1522 . n - This value should be the same as the local size used in creating the 1523 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 1524 calculated if N is given) For square matrices n is almost always m. 1525 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 1526 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 1527 . d_nz - number of nonzeros per row in diagonal portion of local submatrix 1528 (same for all local rows) 1529 . d_nzz - array containing the number of nonzeros in the various rows of the 1530 diagonal portion of the local submatrix (possibly different for each row) 1531 or PETSC_NULL. You must leave room for the diagonal entry even if 1532 it is zero. 1533 . o_nz - number of nonzeros per row in the off-diagonal portion of local 1534 submatrix (same for all local rows). 1535 . o_nzz - array containing the number of nonzeros in the various rows of the 1536 off-diagonal portion of the local submatrix (possibly different for 1537 each row) or PETSC_NULL. 1538 1539 Output Parameter: 1540 . A - the matrix 1541 1542 Collective on MPI_Comm 1543 1544 Notes: 1545 The AIJ format (also called the Yale sparse matrix format or 1546 compressed row storage), is fully compatible with standard Fortran 77 1547 storage. That is, the stored row and column indices can begin at 1548 either one (as in Fortran) or zero. See the users manual for details. 1549 1550 The user MUST specify either the local or global matrix dimensions 1551 (possibly both). 1552 1553 By default, this format uses inodes (identical nodes) when possible. 1554 We search for consecutive rows with the same nonzero structure, thereby 1555 reusing matrix information to achieve increased efficiency. 1556 1557 Options Database Keys: 1558 $ -mat_aij_no_inode - Do not use inodes 1559 $ -mat_aij_inode_limit <limit> - Set inode limit. 1560 $ (max limit=5) 1561 $ -mat_aij_oneindex - Internally use indexing starting at 1 1562 $ rather than 0. Note: When calling MatSetValues(), 1563 $ the user still MUST index entries starting at 0! 1564 1565 Storage Information: 1566 For a square global matrix we define each processor's diagonal portion 1567 to be its local rows and the corresponding columns (a square submatrix); 1568 each processor's off-diagonal portion encompasses the remainder of the 1569 local matrix (a rectangular submatrix). 1570 1571 The user can specify preallocated storage for the diagonal part of 1572 the local submatrix with either d_nz or d_nnz (not both). Set 1573 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 1574 memory allocation. Likewise, specify preallocated storage for the 1575 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 1576 1577 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 1578 the figure below we depict these three local rows and all columns (0-11). 1579 1580 $ 0 1 2 3 4 5 6 7 8 9 10 11 1581 $ ------------------- 1582 $ row 3 | o o o d d d o o o o o o 1583 $ row 4 | o o o d d d o o o o o o 1584 $ row 5 | o o o d d d o o o o o o 1585 $ ------------------- 1586 $ 1587 1588 Thus, any entries in the d locations are stored in the d (diagonal) 1589 submatrix, and any entries in the o locations are stored in the 1590 o (off-diagonal) submatrix. Note that the d and the o submatrices are 1591 stored simply in the MATSEQAIJ format for compressed row storage. 1592 1593 Now d_nz should indicate the number of nonzeros per row in the d matrix, 1594 and o_nz should indicate the number of nonzeros per row in the o matrix. 1595 In general, for PDE problems in which most nonzeros are near the diagonal, 1596 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 1597 or you will get TERRIBLE performance; see the users' manual chapter on 1598 matrices. 1599 1600 .keywords: matrix, aij, compressed row, sparse, parallel 1601 1602 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1603 @*/ 1604 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 1605 { 1606 Mat B; 1607 Mat_MPIAIJ *b; 1608 int ierr, i,sum[2],work[2],size; 1609 1610 PetscFunctionBegin; 1611 MPI_Comm_size(comm,&size); 1612 if (size == 1) { 1613 if (M == PETSC_DECIDE) M = m; 1614 if (N == PETSC_DECIDE) N = n; 1615 ierr = MatCreateSeqAIJ(comm,M,N,d_nz,d_nnz,A); CHKERRQ(ierr); 1616 PetscFunctionReturn(0); 1617 } 1618 1619 *A = 0; 1620 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,comm,MatDestroy,MatView); 1621 PLogObjectCreate(B); 1622 B->data = (void *) (b = PetscNew(Mat_MPIAIJ)); CHKPTRQ(b); 1623 PetscMemzero(b,sizeof(Mat_MPIAIJ)); 1624 PetscMemcpy(B->ops,&MatOps,sizeof(struct _MatOps)); 1625 B->ops->destroy = MatDestroy_MPIAIJ; 1626 B->ops->view = MatView_MPIAIJ; 1627 B->factor = 0; 1628 B->assembled = PETSC_FALSE; 1629 B->mapping = 0; 1630 1631 B->insertmode = NOT_SET_VALUES; 1632 b->size = size; 1633 MPI_Comm_rank(comm,&b->rank); 1634 1635 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) { 1636 SETERRQ(PETSC_ERR_ARG_WRONG,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 1637 } 1638 1639 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 1640 work[0] = m; work[1] = n; 1641 ierr = MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );CHKERRQ(ierr); 1642 if (M == PETSC_DECIDE) M = sum[0]; 1643 if (N == PETSC_DECIDE) N = sum[1]; 1644 } 1645 if (m == PETSC_DECIDE) {m = M/b->size + ((M % b->size) > b->rank);} 1646 if (n == PETSC_DECIDE) {n = N/b->size + ((N % b->size) > b->rank);} 1647 b->m = m; B->m = m; 1648 b->n = n; B->n = n; 1649 b->N = N; B->N = N; 1650 b->M = M; B->M = M; 1651 1652 /* build local table of row and column ownerships */ 1653 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 1654 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1655 b->cowners = b->rowners + b->size + 2; 1656 ierr = MPI_Allgather(&m,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1657 b->rowners[0] = 0; 1658 for ( i=2; i<=b->size; i++ ) { 1659 b->rowners[i] += b->rowners[i-1]; 1660 } 1661 b->rstart = b->rowners[b->rank]; 1662 b->rend = b->rowners[b->rank+1]; 1663 ierr = MPI_Allgather(&n,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1664 b->cowners[0] = 0; 1665 for ( i=2; i<=b->size; i++ ) { 1666 b->cowners[i] += b->cowners[i-1]; 1667 } 1668 b->cstart = b->cowners[b->rank]; 1669 b->cend = b->cowners[b->rank+1]; 1670 1671 if (d_nz == PETSC_DEFAULT) d_nz = 5; 1672 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 1673 PLogObjectParent(B,b->A); 1674 if (o_nz == PETSC_DEFAULT) o_nz = 0; 1675 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 1676 PLogObjectParent(B,b->B); 1677 1678 /* build cache for off array entries formed */ 1679 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 1680 b->donotstash = 0; 1681 b->colmap = 0; 1682 b->garray = 0; 1683 b->roworiented = 1; 1684 1685 /* stuff used for matrix vector multiply */ 1686 b->lvec = 0; 1687 b->Mvctx = 0; 1688 1689 /* stuff for MatGetRow() */ 1690 b->rowindices = 0; 1691 b->rowvalues = 0; 1692 b->getrowactive = PETSC_FALSE; 1693 1694 *A = B; 1695 PetscFunctionReturn(0); 1696 } 1697 1698 #undef __FUNC__ 1699 #define __FUNC__ "MatConvertSameType_MPIAIJ" 1700 int MatConvertSameType_MPIAIJ(Mat matin,Mat *newmat,int cpvalues) 1701 { 1702 Mat mat; 1703 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ *) matin->data; 1704 int ierr, len=0, flg; 1705 1706 PetscFunctionBegin; 1707 *newmat = 0; 1708 PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIAIJ,matin->comm,MatDestroy,MatView); 1709 PLogObjectCreate(mat); 1710 mat->data = (void *) (a = PetscNew(Mat_MPIAIJ)); CHKPTRQ(a); 1711 PetscMemcpy(mat->ops,&MatOps,sizeof(struct _MatOps)); 1712 mat->ops->destroy = MatDestroy_MPIAIJ; 1713 mat->ops->view = MatView_MPIAIJ; 1714 mat->factor = matin->factor; 1715 mat->assembled = PETSC_TRUE; 1716 1717 a->m = mat->m = oldmat->m; 1718 a->n = mat->n = oldmat->n; 1719 a->M = mat->M = oldmat->M; 1720 a->N = mat->N = oldmat->N; 1721 1722 a->rstart = oldmat->rstart; 1723 a->rend = oldmat->rend; 1724 a->cstart = oldmat->cstart; 1725 a->cend = oldmat->cend; 1726 a->size = oldmat->size; 1727 a->rank = oldmat->rank; 1728 mat->insertmode = NOT_SET_VALUES; 1729 a->rowvalues = 0; 1730 a->getrowactive = PETSC_FALSE; 1731 1732 a->rowners = (int *) PetscMalloc(2*(a->size+2)*sizeof(int)); CHKPTRQ(a->rowners); 1733 PLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ)); 1734 a->cowners = a->rowners + a->size + 2; 1735 PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int)); 1736 ierr = StashInitialize_Private(&a->stash); CHKERRQ(ierr); 1737 if (oldmat->colmap) { 1738 a->colmap = (int *) PetscMalloc((a->N)*sizeof(int));CHKPTRQ(a->colmap); 1739 PLogObjectMemory(mat,(a->N)*sizeof(int)); 1740 PetscMemcpy(a->colmap,oldmat->colmap,(a->N)*sizeof(int)); 1741 } else a->colmap = 0; 1742 if (oldmat->garray) { 1743 len = ((Mat_SeqAIJ *) (oldmat->B->data))->n; 1744 a->garray = (int *) PetscMalloc((len+1)*sizeof(int)); CHKPTRQ(a->garray); 1745 PLogObjectMemory(mat,len*sizeof(int)); 1746 if (len) PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); 1747 } else a->garray = 0; 1748 1749 ierr = VecDuplicate(oldmat->lvec,&a->lvec); CHKERRQ(ierr); 1750 PLogObjectParent(mat,a->lvec); 1751 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx); CHKERRQ(ierr); 1752 PLogObjectParent(mat,a->Mvctx); 1753 ierr = MatConvert(oldmat->A,MATSAME,&a->A); CHKERRQ(ierr); 1754 PLogObjectParent(mat,a->A); 1755 ierr = MatConvert(oldmat->B,MATSAME,&a->B); CHKERRQ(ierr); 1756 PLogObjectParent(mat,a->B); 1757 ierr = OptionsHasName(PETSC_NULL,"-help",&flg); CHKERRQ(ierr); 1758 if (flg) { 1759 ierr = MatPrintHelp(mat); CHKERRQ(ierr); 1760 } 1761 *newmat = mat; 1762 PetscFunctionReturn(0); 1763 } 1764 1765 #include "sys.h" 1766 1767 #undef __FUNC__ 1768 #define __FUNC__ "MatLoad_MPIAIJ" 1769 int MatLoad_MPIAIJ(Viewer viewer,MatType type,Mat *newmat) 1770 { 1771 Mat A; 1772 Scalar *vals,*svals; 1773 MPI_Comm comm = ((PetscObject)viewer)->comm; 1774 MPI_Status status; 1775 int i, nz, ierr, j,rstart, rend, fd; 1776 int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols; 1777 int *ourlens,*sndcounts = 0,*procsnz = 0, *offlens,jj,*mycols,*smycols; 1778 int tag = ((PetscObject)viewer)->tag; 1779 1780 PetscFunctionBegin; 1781 MPI_Comm_size(comm,&size); MPI_Comm_rank(comm,&rank); 1782 if (!rank) { 1783 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1784 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT); CHKERRQ(ierr); 1785 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 1786 if (header[3] < 0) { 1787 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix in special format on disk, cannot load as MPIAIJ"); 1788 } 1789 } 1790 1791 1792 ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 1793 M = header[1]; N = header[2]; 1794 /* determine ownership of all rows */ 1795 m = M/size + ((M % size) > rank); 1796 rowners = (int *) PetscMalloc((size+2)*sizeof(int)); CHKPTRQ(rowners); 1797 ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 1798 rowners[0] = 0; 1799 for ( i=2; i<=size; i++ ) { 1800 rowners[i] += rowners[i-1]; 1801 } 1802 rstart = rowners[rank]; 1803 rend = rowners[rank+1]; 1804 1805 /* distribute row lengths to all processors */ 1806 ourlens = (int*) PetscMalloc( 2*(rend-rstart)*sizeof(int) ); CHKPTRQ(ourlens); 1807 offlens = ourlens + (rend-rstart); 1808 if (!rank) { 1809 rowlengths = (int*) PetscMalloc( M*sizeof(int) ); CHKPTRQ(rowlengths); 1810 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 1811 sndcounts = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(sndcounts); 1812 for ( i=0; i<size; i++ ) sndcounts[i] = rowners[i+1] - rowners[i]; 1813 ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr); 1814 PetscFree(sndcounts); 1815 } else { 1816 ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT, 0,comm);CHKERRQ(ierr); 1817 } 1818 1819 if (!rank) { 1820 /* calculate the number of nonzeros on each processor */ 1821 procsnz = (int*) PetscMalloc( size*sizeof(int) ); CHKPTRQ(procsnz); 1822 PetscMemzero(procsnz,size*sizeof(int)); 1823 for ( i=0; i<size; i++ ) { 1824 for ( j=rowners[i]; j< rowners[i+1]; j++ ) { 1825 procsnz[i] += rowlengths[j]; 1826 } 1827 } 1828 PetscFree(rowlengths); 1829 1830 /* determine max buffer needed and allocate it */ 1831 maxnz = 0; 1832 for ( i=0; i<size; i++ ) { 1833 maxnz = PetscMax(maxnz,procsnz[i]); 1834 } 1835 cols = (int *) PetscMalloc( maxnz*sizeof(int) ); CHKPTRQ(cols); 1836 1837 /* read in my part of the matrix column indices */ 1838 nz = procsnz[0]; 1839 mycols = (int *) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1840 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT); CHKERRQ(ierr); 1841 1842 /* read in every one elses and ship off */ 1843 for ( i=1; i<size; i++ ) { 1844 nz = procsnz[i]; 1845 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT); CHKERRQ(ierr); 1846 ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 1847 } 1848 PetscFree(cols); 1849 } else { 1850 /* determine buffer space needed for message */ 1851 nz = 0; 1852 for ( i=0; i<m; i++ ) { 1853 nz += ourlens[i]; 1854 } 1855 mycols = (int*) PetscMalloc( nz*sizeof(int) ); CHKPTRQ(mycols); 1856 1857 /* receive message of column indices*/ 1858 ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 1859 ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 1860 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1861 } 1862 1863 /* loop over local rows, determining number of off diagonal entries */ 1864 PetscMemzero(offlens,m*sizeof(int)); 1865 jj = 0; 1866 for ( i=0; i<m; i++ ) { 1867 for ( j=0; j<ourlens[i]; j++ ) { 1868 if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++; 1869 jj++; 1870 } 1871 } 1872 1873 /* create our matrix */ 1874 for ( i=0; i<m; i++ ) { 1875 ourlens[i] -= offlens[i]; 1876 } 1877 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr); 1878 A = *newmat; 1879 MatSetOption(A,MAT_COLUMNS_SORTED); 1880 for ( i=0; i<m; i++ ) { 1881 ourlens[i] += offlens[i]; 1882 } 1883 1884 if (!rank) { 1885 vals = (Scalar *) PetscMalloc( maxnz*sizeof(Scalar) ); CHKPTRQ(vals); 1886 1887 /* read in my part of the matrix numerical values */ 1888 nz = procsnz[0]; 1889 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1890 1891 /* insert into matrix */ 1892 jj = rstart; 1893 smycols = mycols; 1894 svals = vals; 1895 for ( i=0; i<m; i++ ) { 1896 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1897 smycols += ourlens[i]; 1898 svals += ourlens[i]; 1899 jj++; 1900 } 1901 1902 /* read in other processors and ship out */ 1903 for ( i=1; i<size; i++ ) { 1904 nz = procsnz[i]; 1905 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR); CHKERRQ(ierr); 1906 ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 1907 } 1908 PetscFree(procsnz); 1909 } else { 1910 /* receive numeric values */ 1911 vals = (Scalar*) PetscMalloc( nz*sizeof(Scalar) ); CHKPTRQ(vals); 1912 1913 /* receive message of values*/ 1914 ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 1915 ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 1916 if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 1917 1918 /* insert into matrix */ 1919 jj = rstart; 1920 smycols = mycols; 1921 svals = vals; 1922 for ( i=0; i<m; i++ ) { 1923 ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 1924 smycols += ourlens[i]; 1925 svals += ourlens[i]; 1926 jj++; 1927 } 1928 } 1929 PetscFree(ourlens); PetscFree(vals); PetscFree(mycols); PetscFree(rowners); 1930 1931 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1932 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1933 PetscFunctionReturn(0); 1934 } 1935 1936 #undef __FUNC__ 1937 #define __FUNC__ "MatGetSubMatrix_MPIAIJ" 1938 /* 1939 Not great since it makes two copies of the submatrix, first an SeqAIJ 1940 in local and then by concatenating the local matrices the end result. 1941 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 1942 */ 1943 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatGetSubMatrixCall call,Mat *newmat) 1944 { 1945 int ierr, i, m,n,rstart,row,rend,nz,*cwork,size,rank,j; 1946 Mat *local,M, Mreuse; 1947 Scalar *vwork,*aa; 1948 MPI_Comm comm = mat->comm; 1949 Mat_SeqAIJ *aij; 1950 int *ii, *jj,nlocal,*dlens,*olens,dlen,olen,jend; 1951 1952 PetscFunctionBegin; 1953 MPI_Comm_rank(comm,&rank); 1954 MPI_Comm_size(comm,&size); 1955 1956 if (call == MAT_REUSE_MATRIX) { 1957 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 1958 if (!Mreuse) SETERRQ(1,1,"Submatrix passed in was not used before, cannot reuse"); 1959 local = &Mreuse; 1960 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 1961 } else { 1962 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 1963 Mreuse = *local; 1964 PetscFree(local); 1965 } 1966 1967 /* 1968 m - number of local rows 1969 n - number of columns (same on all processors) 1970 rstart - first row in new global matrix generated 1971 */ 1972 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 1973 if (call == MAT_INITIAL_MATRIX) { 1974 aij = (Mat_SeqAIJ *) (Mreuse)->data; 1975 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 1976 ii = aij->i; 1977 jj = aij->j; 1978 1979 /* 1980 Determine the number of non-zeros in the diagonal and off-diagonal 1981 portions of the matrix in order to do correct preallocation 1982 */ 1983 1984 /* first get start and end of "diagonal" columns */ 1985 if (csize == PETSC_DECIDE) { 1986 nlocal = n/size + ((n % size) > rank); 1987 } else { 1988 nlocal = csize; 1989 } 1990 ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 1991 rstart = rend - nlocal; 1992 if (rank == size - 1 && rend != n) { 1993 SETERRQ(1,1,"Local column sizes do not add up to total number of columns"); 1994 } 1995 1996 /* next, compute all the lengths */ 1997 dlens = (int *) PetscMalloc( (2*m+1)*sizeof(int) );CHKPTRQ(dlens); 1998 olens = dlens + m; 1999 for ( i=0; i<m; i++ ) { 2000 jend = ii[i+1] - ii[i]; 2001 olen = 0; 2002 dlen = 0; 2003 for ( j=0; j<jend; j++ ) { 2004 if ( *jj < rstart || *jj >= rend) olen++; 2005 else dlen++; 2006 jj++; 2007 } 2008 olens[i] = olen; 2009 dlens[i] = dlen; 2010 } 2011 ierr = MatCreateMPIAIJ(comm,m,PETSC_DECIDE,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr); 2012 PetscFree(dlens); 2013 } else { 2014 int ml,nl; 2015 2016 M = *newmat; 2017 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2018 if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,1,"Previous matrix must be same size/layout as request"); 2019 ierr = MatZeroEntries(M);CHKERRQ(ierr); 2020 /* 2021 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2022 rather than the slower MatSetValues(). 2023 */ 2024 M->was_assembled = PETSC_TRUE; 2025 M->assembled = PETSC_FALSE; 2026 } 2027 ierr = MatGetOwnershipRange(M,&rstart,&rend); CHKERRQ(ierr); 2028 aij = (Mat_SeqAIJ *) (Mreuse)->data; 2029 if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,1,"No support for index shifted matrix"); 2030 ii = aij->i; 2031 jj = aij->j; 2032 aa = aij->a; 2033 for (i=0; i<m; i++) { 2034 row = rstart + i; 2035 nz = ii[i+1] - ii[i]; 2036 cwork = jj; jj += nz; 2037 vwork = aa; aa += nz; 2038 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES); CHKERRQ(ierr); 2039 } 2040 2041 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2042 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2043 *newmat = M; 2044 2045 /* save submatrix used in processor for next request */ 2046 if (call == MAT_INITIAL_MATRIX) { 2047 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 2048 ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 2049 } 2050 2051 PetscFunctionReturn(0); 2052 } 2053